void M_apm_round_fixpt(M_APM btmp, int places, M_APM atmp) { int xp, ii; xp = atmp->m_apm_exponent; ii = xp + places - 1; M_set_to_zero(btmp); /* assume number is too small so the net result is 0 */ if (ii >= 0) { m_apm_round(btmp, ii, atmp); } else { if (ii == -1) /* next digit is significant which may round up */ { if (atmp->m_apm_data[0] >= 50) /* digit >= 5, round up */ { m_apm_copy(btmp, atmp); btmp->m_apm_data[0] = 10; btmp->m_apm_exponent += 1; btmp->m_apm_datalength = 1; M_apm_normalize(btmp); } } } }
void m_apm_integer_divide(M_APM rr, M_APM aa, M_APM bb) { /* * we must use this divide function since the * faster divide function using the reciprocal * will round the result (possibly changing * nnm.999999... --> nn(m+1).0000 which would * invalidate the 'integer_divide' goal). */ if (aa->m_apm_error || bb->m_apm_error) { M_set_to_error(rr); return; } M_apm_sdivide(rr, 4, aa, bb); if (rr->m_apm_exponent <= 0) /* result is 0 */ { M_set_to_zero(rr); } else { if (rr->m_apm_datalength > rr->m_apm_exponent) { rr->m_apm_datalength = rr->m_apm_exponent; M_apm_normalize(rr); } } }
/* * return the nearest integer <= input */ void m_apm_floor(M_APM bb, M_APM aa) { M_APM mtmp; m_apm_copy(bb, aa); if (m_apm_is_integer(bb)) /* if integer, we're done */ return; if (bb->m_apm_exponent <= 0) /* if |bb| < 1, result is -1 or 0 */ { if (bb->m_apm_sign < 0) m_apm_negate(bb, MM_One); else M_set_to_zero(bb); return; } if (bb->m_apm_sign < 0) { mtmp = M_get_stack_var(); m_apm_negate(mtmp, bb); mtmp->m_apm_datalength = mtmp->m_apm_exponent; M_apm_normalize(mtmp); m_apm_add(bb, mtmp, MM_One); bb->m_apm_sign = -1; M_restore_stack(1); } else { bb->m_apm_datalength = bb->m_apm_exponent; M_apm_normalize(bb); } }
void M_fast_multiply(M_APM rr, M_APM aa, M_APM bb) { void *vp; int ii, k, nexp, sign; if (M_firsttimef) { M_firsttimef = FALSE; for (k=0; k < M_STACK_SIZE; k++) mul_stack_data_size[k] = 0; size_flag = M_get_sizeof_int(); bit_limit = 8 * size_flag + 1; M_ain = m_apm_init(); M_bin = m_apm_init(); } exp_stack_ptr = -1; M_mul_stack_ptr = -1; m_apm_copy(M_ain, aa); m_apm_copy(M_bin, bb); sign = M_ain->m_apm_sign * M_bin->m_apm_sign; nexp = M_ain->m_apm_exponent + M_bin->m_apm_exponent; if (M_ain->m_apm_datalength >= M_bin->m_apm_datalength) ii = M_ain->m_apm_datalength; else ii = M_bin->m_apm_datalength; ii = (ii + 1) >> 1; ii = M_next_power_of_2(ii); /* Note: 'ii' must be >= 4 here. this is guaranteed by the caller: m_apm_multiply */ k = 2 * ii; /* required size of result, in bytes */ M_apm_pad(M_ain, k); /* fill out the data so the number of */ M_apm_pad(M_bin, k); /* bytes is an exact power of 2 */ if (k > rr->m_apm_malloclength) { if ((vp = MAPM_REALLOC(rr->m_apm_data, (k + 32))) == NULL) { /* fatal, this does not return */ M_apm_log_error_msg(M_APM_FATAL, "\'M_fast_multiply\', Out of memory"); } rr->m_apm_malloclength = k + 28; rr->m_apm_data = (UCHAR *)vp; } #ifdef NO_FFT_MULTIPLY M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data, M_bin->m_apm_data, ii); #else /* * if the numbers are *really* big, use the divide-and-conquer * routine first until the numbers are small enough to be handled * by the FFT algorithm. If the numbers are already small enough, * call the FFT multiplication now. * * Note that 'ii' here is (and must be) an exact power of 2. */ if (size_flag == 2) /* if still using 16 bit compilers .... */ { M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data, M_bin->m_apm_data, ii); } else /* >= 32 bit compilers */ { if (ii > (MAX_FFT_BYTES + 2)) { M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data, M_bin->m_apm_data, ii); } else { M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data, M_bin->m_apm_data, ii); } } #endif rr->m_apm_sign = sign; rr->m_apm_exponent = nexp; rr->m_apm_datalength = 4 * ii; M_apm_normalize(rr); }
void m_apm_multiply(M_APM r, M_APM a, M_APM b) { int ai, itmp, sign, nexp, ii, jj, indexa, indexb, index0, numdigits; UCHAR *cp, *cpr, *cp_div, *cp_rem; void *vp; sign = a->m_apm_sign * b->m_apm_sign; nexp = a->m_apm_exponent + b->m_apm_exponent; if (sign == 0) /* one number is zero, result is zero */ { M_set_to_zero(r); return; } numdigits = a->m_apm_datalength + b->m_apm_datalength; indexa = (a->m_apm_datalength + 1) >> 1; indexb = (b->m_apm_datalength + 1) >> 1; /* * If we are multiplying 2 'big' numbers, use the fast algorithm. * * This is a **very** approx break even point between this algorithm * and the FFT multiply. Note that different CPU's, operating systems, * and compiler's may yield a different break even point. This point * (~96 decimal digits) is how the test came out on the author's system. */ if (indexa >= 48 && indexb >= 48) { M_fast_multiply(r, a, b); return; } ii = (numdigits + 1) >> 1; /* required size of result, in bytes */ if (ii > r->m_apm_malloclength) { if ((vp = MAPM_REALLOC(r->m_apm_data, (ii + 32))) == NULL) { /* fatal, this does not return */ M_apm_log_error_msg(M_APM_FATAL, "\'m_apm_multiply\', Out of memory"); } r->m_apm_malloclength = ii + 28; r->m_apm_data = (UCHAR *)vp; } M_get_div_rem_addr(&cp_div, &cp_rem); index0 = indexa + indexb; cp = r->m_apm_data; memset(cp, 0, index0); ii = indexa; while (TRUE) { index0--; cpr = cp + index0; jj = indexb; ai = (int)a->m_apm_data[--ii]; while (TRUE) { itmp = ai * b->m_apm_data[--jj]; *(cpr-1) += cp_div[itmp]; *cpr += cp_rem[itmp]; if (*cpr >= 100) { *cpr -= 100; *(cpr-1) += 1; } cpr--; if (*cpr >= 100) { *cpr -= 100; *(cpr-1) += 1; } if (jj == 0) break; } if (ii == 0) break; } r->m_apm_sign = sign; r->m_apm_exponent = nexp; r->m_apm_datalength = numdigits; M_apm_normalize(r); }
void M_apm_sdivide(M_APM r, int places, M_APM a, M_APM b) { int j, k, m, b0, sign, nexp, indexr, icompare, iterations; long trial_numer; void *vp; if (M_div_firsttime) { M_div_firsttime = FALSE; M_div_worka = m_apm_init(); M_div_workb = m_apm_init(); M_div_tmp7 = m_apm_init(); M_div_tmp8 = m_apm_init(); M_div_tmp9 = m_apm_init(); } sign = a->m_apm_sign * b->m_apm_sign; if (sign == 0) /* one number is zero, result is zero */ { if (b->m_apm_sign == 0) { M_apm_log_error_msg(M_APM_RETURN, "\'M_apm_sdivide\', Divide by 0"); } M_set_to_zero(r); return; } /* * Knuth step D1. Since base = 100, base / 2 = 50. * (also make the working copies positive) */ if (b->m_apm_data[0] >= 50) { m_apm_absolute_value(M_div_worka, a); m_apm_absolute_value(M_div_workb, b); } else /* 'normal' step D1 */ { k = 100 / (b->m_apm_data[0] + 1); m_apm_set_long(M_div_tmp9, (long)k); m_apm_multiply(M_div_worka, M_div_tmp9, a); m_apm_multiply(M_div_workb, M_div_tmp9, b); M_div_worka->m_apm_sign = 1; M_div_workb->m_apm_sign = 1; } /* setup trial denominator for step D3 */ b0 = 100 * (int)M_div_workb->m_apm_data[0]; if (M_div_workb->m_apm_datalength >= 3) b0 += M_div_workb->m_apm_data[1]; nexp = M_div_worka->m_apm_exponent - M_div_workb->m_apm_exponent; if (nexp > 0) iterations = nexp + places + 1; else iterations = places + 1; k = (iterations + 1) >> 1; /* required size of result, in bytes */ if (k > r->m_apm_malloclength) { if ((vp = MAPM_REALLOC(r->m_apm_data, (k + 32))) == NULL) { /* fatal, this does not return */ M_apm_log_error_msg(M_APM_FATAL, "\'M_apm_sdivide\', Out of memory"); } r->m_apm_malloclength = k + 28; r->m_apm_data = (UCHAR *)vp; } /* clear the exponent in the working copies */ M_div_worka->m_apm_exponent = 0; M_div_workb->m_apm_exponent = 0; /* if numbers are equal, ratio == 1.00000... */ if ((icompare = m_apm_compare(M_div_worka, M_div_workb)) == 0) { iterations = 1; r->m_apm_data[0] = 10; nexp++; } else /* ratio not 1, do the real division */ { if (icompare == 1) /* numerator > denominator */ { nexp++; /* to adjust the final exponent */ M_div_worka->m_apm_exponent += 1; /* multiply numerator by 10 */ } else /* numerator < denominator */ { M_div_worka->m_apm_exponent += 2; /* multiply numerator by 100 */ } indexr = 0; m = 0; while (TRUE) { /* * Knuth step D3. Only use the 3rd -> 6th digits if the number * actually has that many digits. */ trial_numer = 10000L * (long)M_div_worka->m_apm_data[0]; if (M_div_worka->m_apm_datalength >= 5) { trial_numer += 100 * M_div_worka->m_apm_data[1] + M_div_worka->m_apm_data[2]; } else { if (M_div_worka->m_apm_datalength >= 3) trial_numer += 100 * M_div_worka->m_apm_data[1]; } j = (int)(trial_numer / b0); /* * Since the library 'normalizes' all the results, we need * to look at the exponent of the number to decide if we * have a lead in 0n or 00. */ if ((k = 2 - M_div_worka->m_apm_exponent) > 0) { while (TRUE) { j /= 10; if (--k == 0) break; } } if (j == 100) /* qhat == base ?? */ j = 99; /* if so, decrease by 1 */ m_apm_set_long(M_div_tmp8, (long)j); m_apm_multiply(M_div_tmp7, M_div_tmp8, M_div_workb); /* * Compare our q-hat (j) against the desired number. * j is either correct, 1 too large, or 2 too large * per Theorem B on pg 272 of Art of Compter Programming, * Volume 2, 3rd Edition. * * The above statement is only true if using the 2 leading * digits of the numerator and the leading digit of the * denominator. Since we are using the (3) leading digits * of the numerator and the (2) leading digits of the * denominator, we eliminate the case where our q-hat is * 2 too large, (and q-hat being 1 too large is quite remote). */ if (m_apm_compare(M_div_tmp7, M_div_worka) == 1) { j--; m_apm_subtract(M_div_tmp8, M_div_tmp7, M_div_workb); m_apm_copy(M_div_tmp7, M_div_tmp8); } /* * Since we know q-hat is correct, step D6 is unnecessary. * * Store q-hat, step D5. Since D6 is unnecessary, we can * do D5 before D4 and decide if we are done. */ r->m_apm_data[indexr++] = (UCHAR)j; /* j == 'qhat' */ m += 2; if (m >= iterations) break; /* step D4 */ m_apm_subtract(M_div_tmp9, M_div_worka, M_div_tmp7); /* * if the subtraction yields zero, the division is exact * and we are done early. */ if (M_div_tmp9->m_apm_sign == 0) { iterations = m; break; } /* multiply by 100 and re-save */ M_div_tmp9->m_apm_exponent += 2; m_apm_copy(M_div_worka, M_div_tmp9); } } r->m_apm_sign = sign; r->m_apm_exponent = nexp; r->m_apm_datalength = iterations; M_apm_normalize(r); }