static void mf16_divmul_s(mf16 *dest, const mf16 *matrix, fix16_t scalar, uint8_t mul) { int row, column; dest->rows = matrix->rows; dest->columns = matrix->columns; dest->errors = matrix->errors; for (row = 0; row < dest->rows; row++) { for (column = 0; column < dest->columns; column++) { fix16_t value = matrix->data[row][column]; if (mul) value = fix16_mul(value, scalar); else value = fix16_div(value, scalar); if (value == fix16_overflow) dest->errors |= FIXMATRIX_OVERFLOW; dest->data[row][column] = value; } } }
void fix16_vector3_normalized(const fix16_vector3_t *v0, fix16_vector3_t *result) { fix16_t inv_length; inv_length = fix16_div(F16(1.0f), fix16_vector3_length(v0)); fix16_vector3_scaled(inv_length, v0, result); }
void fix16_vector3_normalize(fix16_vector3_t *result) { fix16_t inv_length; inv_length = fix16_div(F16(1.0f), fix16_vector3_length(result)); fix16_vector3_scale(inv_length, result); }
fix16_t fix16_from_str(const char *buf) { while (isspace(*buf)) buf++; /* Decode the sign */ bool negative = (*buf == '-'); if (*buf == '+' || *buf == '-') buf++; /* Decode the integer part */ uint32_t intpart = 0; int count = 0; while (isdigit(*buf)) { intpart *= 10; intpart += *buf++ - '0'; count++; } if (count == 0 || count > 5 || intpart > 32768 || (!negative && intpart > 32767)) return fix16_overflow; fix16_t value = intpart << 16; /* Decode the decimal part */ if (*buf == '.' || *buf == ',') { buf++; uint32_t fracpart = 0; uint32_t scale = 1; while (isdigit(*buf) && scale < 100000) { scale *= 10; fracpart *= 10; fracpart += *buf++ - '0'; } value += fix16_div(fracpart, scale); } /* Verify that there is no garbage left over */ while (*buf != '\0') { if (!isdigit(*buf) && !isspace(*buf)) return fix16_overflow; buf++; } return negative ? -value : value; }
void mf16_solve(mf16 *dest, const mf16 *q, const mf16 *r, const mf16 *matrix) { int row, column, variable; if (r->columns != r->rows || r->columns != q->columns || r == dest) { dest->errors |= FIXMATRIX_USEERR; return; } // Ax=b <=> QRx=b <=> Q'QRx=Q'b <=> Rx=Q'b // Q'b is calculated directly and x is then solved row-by-row. mf16_mul_at(dest, q, matrix); for (column = 0; column < dest->columns; column++) { for (row = dest->rows - 1; row >= 0; row--) { fix16_t value = dest->data[row][column]; // Subtract any already solved variables for (variable = row + 1; variable < r->columns; variable++) { fix16_t multiplier = r->data[row][variable]; fix16_t known_value = dest->data[variable][column]; fix16_t product = fix16_mul(multiplier, known_value); value = fix16_sub(value, product); if (product == fix16_overflow || value == fix16_overflow) { dest->errors |= FIXMATRIX_OVERFLOW; } } // Now value = R_ij x_i <=> x_i = value / R_ij fix16_t divider = r->data[row][row]; if (divider == 0) { dest->errors |= FIXMATRIX_SINGULAR; dest->data[row][column] = 0; continue; } fix16_t result = fix16_div(value, divider); dest->data[row][column] = result; if (result == fix16_overflow) { dest->errors |= FIXMATRIX_OVERFLOW; } } } }
fix16_t fix16_vector3_angle(const fix16_vector3_t *v0, const fix16_vector3_t *v1) { fix16_t v0_length; v0_length = fix16_vector3_length(v0); fix16_t v1_length; v1_length = fix16_vector3_length(v1); return fix16_acos(fix16_div(fix16_vector3_dot(v0, v1), fix16_mul(v0_length, v1_length))); }
/* A basic single-frequency DFT, useful when you are interested in just a single signal. */ static cell AMX_NATIVE_CALL amx_dft(AMX *amx, const cell *params) { // dft(input{}, Fixed: &real, Fixed: &imag, Fixed: period, count); uint8_t *input = (uint8_t*)params[1]; int count = params[5]; fix16_t period = params[4]; fix16_t *realp = (fix16_t*)params[2]; fix16_t *imagp = (fix16_t*)params[3]; // Round the count to a multiple of period int multiple = fix16_from_int(count) / period; count = fix16_to_int(fix16_mul(fix16_from_int(multiple), period)); fix16_t real = 0; fix16_t imag = 0; fix16_t step = fix16_div(2 * fix16_pi, period); fix16_t angle = 0; for (int i = 0; i < count; i++) { // We scale by 256 to achieve a good compromise between precision and // range. fix16_t value = input[INPUT_INDEX(i)] * 256; // Calculate value * (cos(angle) - i * sin(angle)) and add to sum. real += fix16_mul(value, fix16_cos(angle)); imag += fix16_mul(value, -fix16_sin(angle)); angle += step; } fix16_t scale = count * 256; *realp = fix16_div(real, scale); *imagp = fix16_div(imag, scale); return 0; }
Fix16 & operator/=(const float rhs) { value = fix16_div(value, fix16_from_float(rhs)); return *this; }
Fix16 & operator/=(const double rhs) { value = fix16_div(value, fix16_from_dbl(rhs)); return *this; }
Fix16 & operator/=(const fix16_t rhs) { value = fix16_div(value, rhs); return *this; }
Fix16 & operator/=(const Fix16 &rhs) { value = fix16_div(value, rhs.value); return *this; }
void mf16_cholesky(mf16 *dest, const mf16 *matrix) { // This is the Cholesky–Banachiewicz algorithm. // Refer to http://en.wikipedia.org/wiki/Cholesky_decomposition#The_Cholesky.E2.80.93Banachiewicz_and_Cholesky.E2.80.93Crout_algorithms int row, column, k; dest->errors = matrix->errors; if (matrix->rows != matrix->columns) dest->errors |= FIXMATRIX_DIMERR; dest->rows = dest->columns = matrix->rows; for (row = 0; row < dest->rows; row++) { for (column = 0; column < dest->columns; column++) { if (row == column) { // Value on the diagonal // Ljj = sqrt(Ajj - sum(Ljk^2, k = 1..(j-1)) fix16_t value = matrix->data[row][column]; for (k = 0; k < column; k++) { fix16_t Ljk = dest->data[row][k]; Ljk = fix16_mul(Ljk, Ljk); value = fix16_sub(value, Ljk); if (value == fix16_overflow || Ljk == fix16_overflow) dest->errors |= FIXMATRIX_OVERFLOW; } if (value < 0) { if (value < -65) dest->errors |= FIXMATRIX_NEGATIVE; value = 0; } dest->data[row][column] = fix16_sqrt(value); } else if (row < column) { // Value above diagonal dest->data[row][column] = 0; } else { // Value below diagonal // Lij = 1/Ljj (Aij - sum(Lik Ljk, k = 1..(j-1))) fix16_t value = matrix->data[row][column]; for (k = 0; k < column; k++) { fix16_t Lik = dest->data[row][k]; fix16_t Ljk = dest->data[column][k]; fix16_t product = fix16_mul(Lik, Ljk); value = fix16_sub(value, product); if (value == fix16_overflow || product == fix16_overflow) dest->errors |= FIXMATRIX_OVERFLOW; } fix16_t Ljj = dest->data[column][column]; value = fix16_div(value, Ljj); dest->data[row][column] = value; if (value == fix16_overflow) dest->errors |= FIXMATRIX_OVERFLOW; } } } }
void mf16_qr_decomposition(mf16 *q, mf16 *r, const mf16 *matrix, int reorthogonalize) { int i, j, reorth; fix16_t dot, norm; uint8_t stride = FIXMATRIX_MAX_SIZE; uint8_t n = matrix->rows; // This uses the modified Gram-Schmidt algorithm. // subtract_projection takes advantage of the fact that // previous columns have already been normalized. // We start with q = matrix if (q != matrix) { *q = *matrix; } // R is initialized to have square size of cols(A) and zeroed. r->columns = matrix->columns; r->rows = matrix->columns; r->errors = 0; mf16_fill(r, 0); // Now do the actual Gram-Schmidt for the rows. for (j = 0; j < q->columns; j++) { for (reorth = 0; reorth <= reorthogonalize; reorth++) { for (i = 0; i < j; i++) { fix16_t *v = &q->data[0][j]; fix16_t *u = &q->data[0][i]; dot = fa16_dot(v, stride, u, stride, n); subtract_projection(v, u, dot, n, &q->errors); if (dot == fix16_overflow) q->errors |= FIXMATRIX_OVERFLOW; r->data[i][j] += dot; } } // Normalize the row in q norm = fa16_norm(&q->data[0][j], stride, n); r->data[j][j] = norm; if (norm == fix16_overflow) q->errors |= FIXMATRIX_OVERFLOW; if (norm < 5 && norm > -5) { // Nearly zero norm, which means that the row // was linearly dependent. q->errors |= FIXMATRIX_SINGULAR; continue; } for (i = 0; i < n; i++) { // norm >= v[i] for all i, therefore this division // doesn't overflow unless norm approaches 0. q->data[i][j] = fix16_div(q->data[i][j], norm); } } r->errors = q->errors; }
int main() { int i; interface_init(); start_timing(); print_value("Timestamp bias", end_timing()); for (i = 0; i < TESTCASES1_COUNT; i++) { fix16_t input = testcases1[i].a; fix16_t result; fix16_t expected = testcases1[i].sqrt; MEASURE(sqrt_cycles, result = fix16_sqrt(input)); if (input > 0 && delta(result, expected) > max_delta) { print_value("Failed SQRT, i", i); print_value("Failed SQRT, input", input); print_value("Failed SQRT, output", result); print_value("Failed SQRT, expected", expected); } expected = testcases1[i].exp; MEASURE(exp_cycles, result = fix16_exp(input)); if (delta(result, expected) > 400) { print_value("Failed EXP, i", i); print_value("Failed EXP, input", input); print_value("Failed EXP, output", result); print_value("Failed EXP, expected", expected); } } PRINT(sqrt_cycles, "fix16_sqrt"); PRINT(exp_cycles, "fix16_exp"); for (i = 0; i < TESTCASES2_COUNT; i++) { fix16_t a = testcases2[i].a; fix16_t b = testcases2[i].b; volatile fix16_t result; fix16_t expected = testcases2[i].add; MEASURE(add_cycles, result = fix16_add(a, b)); if (delta(result, expected) > max_delta) { print_value("Failed ADD, i", i); print_value("Failed ADD, a", a); print_value("Failed ADD, b", b); print_value("Failed ADD, output", result); print_value("Failed ADD, expected", expected); } expected = testcases2[i].sub; MEASURE(sub_cycles, result = fix16_sub(a, b)); if (delta(result, expected) > max_delta) { print_value("Failed SUB, i", i); print_value("Failed SUB, a", a); print_value("Failed SUB, b", b); print_value("Failed SUB, output", result); print_value("Failed SUB, expected", expected); } expected = testcases2[i].mul; MEASURE(mul_cycles, result = fix16_mul(a, b)); if (delta(result, expected) > max_delta) { print_value("Failed MUL, i", i); print_value("Failed MUL, a", a); print_value("Failed MUL, b", b); print_value("Failed MUL, output", result); print_value("Failed MUL, expected", expected); } if (b != 0) { expected = testcases2[i].div; MEASURE(div_cycles, result = fix16_div(a, b)); if (delta(result, expected) > max_delta) { print_value("Failed DIV, i", i); print_value("Failed DIV, a", a); print_value("Failed DIV, b", b); print_value("Failed DIV, output", result); print_value("Failed DIV, expected", expected); } } } PRINT(add_cycles, "fix16_add"); PRINT(sub_cycles, "fix16_sub"); PRINT(mul_cycles, "fix16_mul"); PRINT(div_cycles, "fix16_div"); /* Compare with floating point performance */ #ifndef NO_FLOAT for (i = 0; i < TESTCASES1_COUNT; i++) { float input = fix16_to_float(testcases1[i].a); volatile float result; MEASURE(float_sqrtf_cycles, result = sqrtf(input)); } PRINT(float_sqrtf_cycles, "float sqrtf"); for (i = 0; i < TESTCASES2_COUNT; i++) { float a = fix16_to_float(testcases2[i].a); float b = fix16_to_float(testcases2[i].b); volatile float result; MEASURE(float_add_cycles, result = a + b); MEASURE(float_sub_cycles, result = a - b); MEASURE(float_mul_cycles, result = a * b); if (b != 0) { MEASURE(float_div_cycles, result = a / b); } } PRINT(float_add_cycles, "float add"); PRINT(float_sub_cycles, "float sub"); PRINT(float_mul_cycles, "float mul"); PRINT(float_div_cycles, "float div"); #endif return 0; }