ui64_t ui64_mul(ui64_t x, ui64_t y, ui64_t *ov) { UIXX_T(UI64_DIGITS+UI64_DIGITS) zx; ui64_t z; int carry; int i, j; /* clear temporary result buffer */ for (i = 0; i < (UI64_DIGITS+UI64_DIGITS); i++) zx.x[i] = 0; /* perform multiplication operation */ for (i = 0; i < UI64_DIGITS; i++) { /* calculate partial product and immediately add to z */ carry = 0; for (j = 0; j < UI64_DIGITS; j++) { carry += (x.x[i] * y.x[j]) + zx.x[i+j]; zx.x[i+j] = (carry % UI64_BASE); carry /= UI64_BASE; } /* add carry to remaining digits in z */ for ( ; j < UI64_DIGITS + UI64_DIGITS - i; j++) { carry += zx.x[i+j]; zx.x[i+j] = (carry % UI64_BASE); carry /= UI64_BASE; } } /* provide result by splitting zx into z and ov */ memcpy(z.x, zx.x, UI64_DIGITS); if (ov != NULL) memcpy((*ov).x, &zx.x[UI64_DIGITS], UI64_DIGITS); return z; }
ui64_t ui64_ror(ui64_t x, int s, ui64_t *ov) { UIXX_T(UI64_DIGITS+UI64_DIGITS) zx; ui64_t z; int i; int carry; if (s <= 0) { /* no shift at all */ if (ov != NULL) *ov = ui64_zero(); return x; } else if (s > 64) { /* too large shift */ if (ov != NULL) *ov = ui64_zero(); return ui64_zero(); } else if (s == 64) { /* maximum shift */ if (ov != NULL) *ov = x; return ui64_zero(); } else { /* regular shift */ /* shift (logically) right by s/8 bytes */ for (i = 0; i < UI64_DIGITS+UI64_DIGITS; i++) zx.x[i] = 0; for (i = 0; i < UI64_DIGITS; i++) zx.x[UI64_DIGITS+i-(s/8)] = x.x[i]; /* shift (logically) right by remaining s%8 bits */ s %= 8; if (s > 0) { carry = 0; for (i = (UI64_DIGITS+UI64_DIGITS - 1); i >= 0; i--) { carry = (carry * UI64_BASE) + zx.x[i]; zx.x[i] = (carry / (1 << s)); carry %= (1 << s); } } memcpy(z.x, &zx.x[UI64_DIGITS], UI64_DIGITS); if (ov != NULL) memcpy((*ov).x, zx.x, UI64_DIGITS); } return z; }
ui128_t ui128_rol(ui128_t x, int s, ui128_t *ov) { UIXX_T(UI128_DIGITS+UI128_DIGITS) zx; ui128_t z; int i; int carry; if (s <= 0) { /* no shift at all */ if (ov != NULL) *ov = ui128_zero(); return x; } else if (s > 128) { /* too large shift */ if (ov != NULL) *ov = ui128_zero(); return ui128_zero(); } else if (s == 128) { /* maximum shift */ if (ov != NULL) *ov = x; return ui128_zero(); } else { /* regular shift */ /* shift (logically) left by s/8 bytes */ for (i = 0; i < UI128_DIGITS+UI128_DIGITS; i++) zx.x[i] = 0; for (i = 0; i < UI128_DIGITS; i++) zx.x[i+(s/8)] = x.x[i]; /* shift (logically) left by remaining s%8 bits */ s %= 8; if (s > 0) { carry = 0; for (i = 0; i < UI128_DIGITS+UI128_DIGITS; i++) { carry += (zx.x[i] * (1 << s)); zx.x[i] = (carry % UI128_BASE); carry /= UI128_BASE; } } memcpy(z.x, zx.x, UI128_DIGITS); if (ov != NULL) memcpy((*ov).x, &zx.x[UI128_DIGITS], UI128_DIGITS); } return z; }
/* = 2078 [q] 0615367 [x] : 296 [y] -0592 [dq] ----- = 0233 -0000 [dq] ----- = 2336 -2072 [dq] ----- = 2647 -2308 [dq] ----- = 279 [r] */ ui64_t ui64_div(ui64_t x, ui64_t y, ui64_t *ov) { ui64_t q; ui64_t r; int i; int n, m; int ovn; /* determine actual number of involved digits */ n = ui64_len(x); m = ui64_len(y); if (m == 1) { /* simple case #1: reduceable to ui64_divn() */ if (y.x[0] == 0) { /* error case: division by zero! */ ui64_fill(q, 0); ui64_fill(r, 0); } else { q = ui64_divn(x, y.x[0], &ovn); ui64_fill(r, 0); r.x[0] = (unsigned char)ovn; } } else if (n < m) { /* simple case #2: everything is in the remainder */ ui64_fill(q, 0); r = x; } else { /* n >= m, m > 1 */ /* standard case: x[0..n] / y[0..m] */ UIXX_T(UI64_DIGITS+1) rx; UIXX_T(UI64_DIGITS+1) dq; ui64_t t; int km; int k; int qk; unsigned long y2; unsigned long r3; int borrow; int d; /* rx is x with a leading zero in order to make sure that n > m and not just n >= m */ memcpy(rx.x, x.x, UI64_DIGITS); rx.x[UI64_DIGITS] = 0; for (k = n - m; k >= 0; k--) { /* efficiently compute qk by guessing qk := rx[k+m-2...k+m]/y[m-2...m-1] */ km = k + m; y2 = (y.x[m-1]*UI64_BASE) + y.x[m-2]; r3 = (rx.x[km]*(UI64_BASE*UI64_BASE)) + (rx.x[km-1]*UI64_BASE) + rx.x[km-2]; qk = r3 / y2; if (qk >= UI64_BASE) qk = UI64_BASE - 1; /* dq := y*qk (post-adjust qk if guessed incorrectly) */ t = ui64_muln(y, qk, &ovn); memcpy(dq.x, t.x, UI64_DIGITS); dq.x[m] = (unsigned char)ovn; for (i = m; i > 0; i--) if (rx.x[i+k] != dq.x[i]) break; if (rx.x[i+k] < dq.x[i]) { t = ui64_muln(y, --qk, &ovn); memcpy(dq.x, t.x, UI64_DIGITS); dq.x[m] = (unsigned char)ovn; } /* store qk */ q.x[k] = (unsigned char)qk; /* rx := rx - dq*(b^k) */ borrow = 0; for (i = 0; i < m+1; i++) { d = ((rx.x[k+i] + UI64_BASE) - borrow - dq.x[i]); rx.x[k+i] = (d % UI64_BASE); borrow = (1 - (d/UI64_BASE)); } } memcpy(r.x, rx.x, m); /* fill out results with leading zeros */ for (i = n-m+1; i < UI64_DIGITS; i++) q.x[i] = 0; for (i = m; i < UI64_DIGITS; i++) r.x[i] = 0; } /* provide results */ if (ov != NULL) *ov = r; return q; }