void add_iteration_lists(Graph g) { Node node,child,parent,child1,child2; int i,j,k,l; MARKED_1(g -> source) = TRUE; for(node = g -> source->next; node != NULL; node = node -> next) { for(i = 0; i < ListSize(node -> predecessors); i++) { parent = (Node) ListIndex(node -> predecessors,i); if (MARKED_1(parent) == FALSE) { for(j=0; j < ListSize(parent -> successors); j++) { child = (Node) ListIndex(parent->successors,j); if ((strcmp(child->name,node->name) != 0) && (ORDER(child) > ORDER(parent))) { ListPut(ITER_END_NODES(node),child); } } } } MARKED_1(node) = TRUE; for(k = 0; k < ListSize(node -> successors); k++) { child1 = (Node) ListIndex(node -> successors,k); if (MARKED_1(child1) == TRUE) { for(l = 0; l < ListSize(node -> successors); l++) { child2 = (Node) ListIndex(node -> successors,l); if (MARKED_1(child2) == FALSE) { ListPut(ITER_START_NODES(child2),child1); } } } } } }
void handle_selection(Node n) { int i,j; Node parent; Node child; if ((n -> predecessors == NULL) || (MARKED_3(n) == TRUE)) return; MARKED_3(n) = TRUE; for(i = 0; i < ListSize(n -> predecessors); i++) { parent = (Node) ListIndex(n -> predecessors,i); if ((parent -> type) == SELECTION) { for(j=0; j < ListSize(parent -> successors); j++) { child = (Node) ListIndex(parent -> successors,j); if (strcmp((child->name),n->name) != 0) { mark_successors(child,ACT_NONE); } } } if (ORDER(n) > ORDER(parent)) handle_selection(parent); } return; }
void check () { mpfi_t interval; long a, b, c; int cmp; int i; i = 1; a = LONG_MAX; while (a >>= 1) i++; mpfi_init2 (interval, i); for (i = 0; i <= 1000; ++i) { /* random numbers a < b < c */ a = random_si (); b = random_si (); c = random_si (); ORDER (a, b); ORDER (a, c); ORDER (b, c); mpfi_interv_si (interval, b, c); cmp = mpfi_cmp_si (interval, a); if (cmp < 0 || (cmp == 0 && a != b)) { print_error (a, interval); } mpfr_set_nan (&(interval->right)); if (mpfi_cmp_si (interval, a) != 1) { print_error (a, interval); } mpfi_interv_si (interval, a, c); if (mpfi_cmp_si (interval, b) != 0) { print_error (b, interval); } mpfr_set_nan (&(interval->right)); if (mpfi_cmp_si (interval, b) != 1) { print_error (b, interval); } mpfi_interv_si (interval, a, b); cmp = mpfi_cmp_si (interval, c); if (cmp > 0 || (cmp == 0 && c != b)) { print_error (c, interval); } mpfr_set_nan (&(interval->right)); if (mpfi_cmp_si (interval, c) != 1) { print_error (c, interval); } } mpfi_clear (interval); }
/* update ringlist-tree */ void update_tree(int i, int j) { baum *rli, *rlj, *tempb; if ( abs(i) < GSV.len) { /* >> single basepair move */ if ((i > 0) && (j > 0)) { /* insert */ rli = &rl[i-1]; rlj = &rl[j-1]; close_bp_en(rli, rlj); } else if ((i < 0)&&(j < 0)) { /* delete */ i = -i; rli = &rl[i-1]; open_bp_en(rli); } else { /* shift */ if (i > 0) { /* i remains the same, j shifts */ j=-j; rli=&rl[i-1]; rlj=&rl[j-1]; open_bp_en(rli); ORDER(rli, rlj); close_bp_en(rli, rlj); } else { /* j remains the same, i shifts */ baum *old_rli; i = -i; rli = &rl[i-1]; rlj = &rl[j-1]; old_rli = rlj->up; open_bp_en(old_rli); ORDER(rli, rlj); close_bp_en(rli, rlj); } } } /* << single basepair move */ else { /* >> double basepair move */ if ((i > 0) && (j > 0)) { /* insert */ rli = &rl[i-GSV.len-2]; rlj = &rl[j-GSV.len-2]; close_bp_en(rli->next, rlj->prev); close_bp_en(rli, rlj); } else if ((i < 0)&&(j < 0)) { /* delete */ i = -i; rli = &rl[i-GSV.len-2]; open_bp_en(rli); open_bp_en(rli->next); } } /* << double basepair move */ }
int PARMCI_AccV( int op, /* oeration code */ void *scale, /*scaling factor for accumulate */ armci_giov_t darr[], /* descriptor array */ int len, /* length of descriptor array */ int proc /* remote process(or) ID */ ) { int rc=0, i,direct=0; if(len<1) return FAIL; for(i=0;i<len;i++){ if(darr[i].src_ptr_array==NULL ||darr[i].dst_ptr_array==NULL)return FAIL2; if(darr[i].bytes<1)return FAIL3; if(darr[i].ptr_array_len <1) return FAIL4; } if(proc<0 || proc >= armci_nproc)return FAIL5; ORDER(op,proc); /* ensure ordering */ direct=SAMECLUSNODE(proc); # if defined(ACC_COPY) && !defined(ACC_SMP) if(armci_me != proc) direct=0; # error "grrr" # endif if(direct) { rc = armci_acc_vector( op, scale, darr, len, proc); } else { DO_FENCE(proc,SERVER_PUT); rc = armci_pack_vector(op, scale, darr, len, proc,NULL); } if(rc) return FAIL6; else return 0; }
int PARMCI_GetV( armci_giov_t darr[], /* descriptor array */ int len, /* length of descriptor array */ int proc /* remote process(or) ID */ ) { int rc=0, i,direct=1; if(len<1) return FAIL; for(i=0;i<len;i++){ if(darr[i].src_ptr_array==NULL ||darr[i].dst_ptr_array==NULL)return FAIL2; if(darr[i].bytes<1)return FAIL3; if(darr[i].ptr_array_len <1) return FAIL4; } if(proc<0 || proc >= armci_nproc)return FAIL5; ORDER(GET,proc); /* ensure ordering */ #ifndef QUADRICS direct=SAMECLUSNODE(proc); #endif if(direct){ if(!SAMECLUSNODE(proc))DO_FENCE(proc,DIRECT_GET); rc = armci_copy_vector(GET, darr, len, proc); } else{ DO_FENCE(proc,SERVER_GET); rc = armci_pack_vector(GET, NULL, darr, len, proc,NULL); } if(rc) return FAIL6; else return 0; }
//=========================================================================== // PIT_CheckLine // Adjusts tmfloorz and tmceilingz as lines are contacted. //=========================================================================== static boolean PIT_CheckLine(line_t *ld, void *parm) { checkpos_data_t *tm = parm; fixed_t bbox[4]; // Setup the bounding box for the line. ORDER(ld->v1->x, ld->v2->x, bbox[BOXLEFT], bbox[BOXRIGHT]); ORDER(ld->v1->y, ld->v2->y, bbox[BOXBOTTOM], bbox[BOXTOP]); if(tm->box[BOXRIGHT] <= bbox[BOXLEFT] || tm->box[BOXLEFT] >= bbox[BOXRIGHT] || tm->box[BOXTOP] <= bbox[BOXBOTTOM] || tm->box[BOXBOTTOM] >= bbox[BOXTOP]) return true; if(P_BoxOnLineSide(tm->box, ld) != -1) return true; // A line has been hit. tm->thing->wallhit = true; if(!ld->backsector) return false; // One sided line, can't go through. if(!(tm->thing->ddflags & DDMF_MISSILE)) { if(ld->flags & ML_BLOCKING) return false; // explicitly blocking everything } // set openrange, opentop, openbottom. P_LineOpening(ld); // adjust floor / ceiling heights. if(opentop < tm->ceilingz) tm->ceilingz = opentop; if(openbottom > tm->floorz) tm->floorz = openbottom; if(lowfloor < tm->dropoffz) tm->dropoffz = lowfloor; tm->thing->wallhit = false; return true; }
bool OSOrderedSet::setObject(const OSMetaClassBase *anObject ) { unsigned int i; // queue it behind those with same priority for( i = 0; (i < count) && (ORDER(array[i].obj, anObject) >= 0); i++ ) {} return( setObject(i, anObject)); }
// draw a triangle void ArtDisplayDevice::triangle(const float *a, const float *b, const float *c, const float *n1, const float *n2, const float *n3) { float vec1[3], vec2[3], vec3[3]; float nor1[3], nor2[3], nor3[3]; // transform the world coordinates (transMat.top()).multpoint3d(a, vec1); (transMat.top()).multpoint3d(b, vec2); (transMat.top()).multpoint3d(c, vec3); // and the normals (transMat.top()).multnorm3d(n1, nor1); (transMat.top()).multnorm3d(n2, nor2); (transMat.top()).multnorm3d(n3, nor3); // draw the triangle fprintf(outfile, "polygon {\n"); fprintf(outfile, "colour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); fprintf(outfile, "vertex (%f,%f,%f),(%f,%f,%f)\n", ORDER(vec1[0], vec1[1], vec1[2]), // point one ORDER(nor1[0], nor1[1], nor1[2])); fprintf(outfile, "vertex (%f,%f,%f),(%f,%f,%f)\n", ORDER(vec2[0], vec2[1], vec2[2]), // point two ORDER(nor2[0], nor2[1], nor2[2])); fprintf(outfile, "vertex (%f,%f,%f),(%f,%f,%f)\n", ORDER(vec3[0], vec3[1], vec3[2]), // point three ORDER(nor3[0], nor3[1], nor3[2])); fprintf(outfile, "}\n"); }
// draw a cone void ArtDisplayDevice::cone(float *a, float *b, float r) { float vec1[3], vec2[3]; // transform the world coordinates (transMat.top()).multpoint3d(a, vec1); (transMat.top()).multpoint3d(b, vec2); fprintf(outfile, "cone {\n"); fprintf(outfile, "colour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); // second point fprintf(outfile, "vertex(%f,%f,%f)\n", ORDER(vec2[0], vec2[1], vec2[2])); // first point fprintf(outfile, "center(%f,%f,%f)\n", ORDER(vec1[0], vec1[1], vec1[2])); // radius fprintf(outfile, "radius %f\n}\n", scale_radius(r)); }
// draw a point void ArtDisplayDevice::point(float * spdata) { float vec[3]; // transform the world coordinates (transMat.top()).multpoint3d(spdata, vec); // draw the sphere fprintf(outfile, "sphere {\ncolour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); fprintf(outfile, "radius %f\n", float(lineWidth) * DEFAULT_RADIUS); fprintf(outfile, "center (%f,%f,%f)\n}\n", ORDER(vec[0], vec[1], vec[2])); }
// draw a sphere void ArtDisplayDevice::sphere(float * spdata) { float vec[3]; float radius; // transform the world coordinates (transMat.top()).multpoint3d(spdata, vec); radius = scale_radius(spdata[3]); // draw the sphere fprintf(outfile, "sphere {\ncolour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); fprintf(outfile, "radius %f\n", radius); fprintf(outfile, "center (%f,%f,%f)\n}\n", ORDER(vec[0], vec[1], vec[2])); }
// Read a value; use only after gpio_adc_sample() returns zero uint16_t gpio_adc_read(struct gpio_adc g) { adc_status.chan |= ADC_DONE; // Perform median filter on 5 read samples uint16_t *p = adc_status.samples; uint32_t v0 = p[0], v4 = p[1], v1 = p[2], v3 = p[3], v2 = p[4]; ORDER(v0, v4); ORDER(v1, v3); ORDER(v0, v1); ORDER(v3, v4); ORDER(v1, v3); ORDER(v1, v2); ORDER(v2, v3); return v2; }
Node make_node(char *name, vm_act_state state, int type,int order) { Node n = (Node) malloc (sizeof(struct node)); n -> data = (void *) malloc (sizeof (struct data)); n->name = name; STATE(n) = state; n->type = type; n->script = "script"; n -> predecessors = NULL; n -> successors = NULL; ITER_START(n) = 0; ITER_END(n) = 0; MARKED_0(n) = 0; MARKED_1(n) = 0; MARKED_2(n) = 0; MARKED_3(n) = 0; MARKED_4(n) = 0; ITER_START_NODES(n) = ListCreate(); ITER_END_NODES(n) = ListCreate(); SUPER_NODES(n) = ListCreate(); ORDER(n) = order; return n; }
void initialize_graph(Graph g, int pid) { Node n; int i = 0; for(n = g -> source; n != NULL; n = n -> next) { n -> data = (void *) malloc (sizeof (struct data)); sanitize_node(n); PID(n) = pid; STATE(n) = ACT_NONE; ORDER(n) = i; i++; ITER_START(n) = FALSE; ITER_END(n) = FALSE; ITER_START_NODES(n) = ListCreate(); ITER_END_NODES(n) = ListCreate(); SUPER_NODES(n) = ListCreate(); } add_super_node_lists(g); /* add the node lists */ mark_for_iteration(g); /* mark beginning and end of iterations */ add_iteration_lists(g); /* add the iteration lists */ sanitize(g); /* sanitize markers */ mark_successors(g->source->next,ACT_READY); }
// draw a line (cylinder) from a to b void ArtDisplayDevice::line(float *a, float *b) { int i, j, test; float dirvec[3], unitdirvec[3]; float from[3], to[3], tmp1[3], tmp2[3]; float len; if(lineStyle == ::SOLIDLINE ) { // transform the world coordinates (transMat.top()).multpoint3d(a, from); (transMat.top()).multpoint3d(b, to); // draw the cylinder fprintf(outfile, "cylinder {\n"); fprintf(outfile, "colour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); fprintf(outfile, "center(%f,%f,%f)\n", ORDER(from[0], from[1], from[2])); // first point fprintf(outfile, "center(%f,%f,%f)\n", ORDER(to[0], to[1], to[2])); // second point fprintf(outfile, "radius %f\n}\n", float(lineWidth)*DEFAULT_RADIUS); // radius } else if (lineStyle == ::DASHEDLINE ) { // transform the world coordinates (transMat.top()).multpoint3d(a, tmp1); (transMat.top()).multpoint3d(b, tmp2); // how to create a dashed line for(i=0;i<3;i++) { dirvec[i] = tmp2[i] - tmp1[i]; // vector from a to b } len = sqrtf( dirvec[0]*dirvec[0] + dirvec[1]*dirvec[1] + dirvec[2]*dirvec[2] ); for(i=0;i<3;i++) { unitdirvec[i] = dirvec[i] / sqrtf(len); // unit vector pointing from a to b } test = 1; i = 0; while( test == 1 ) { for(j=0;j<3;j++) { from[j] = tmp1[j] + (2*i)*DASH_LENGTH*unitdirvec[j]; to[j] = tmp1[j] + (2*i + 1)*DASH_LENGTH*unitdirvec[j]; } if( fabsf(tmp1[0] - to[0]) >= fabsf(dirvec[0]) ) { for(j=0;j<3;j++) { to[j] = tmp2[j]; } test = 0; } // draw the cylinder fprintf(outfile, "cylinder {\n"); fprintf(outfile, "colour %f,%f,%f\n", matData[colorIndex][0], matData[colorIndex][1], matData[colorIndex][2]); // first point fprintf(outfile, "center(%f,%f,%f)\n", ORDER(from[0], from[1], from[2])); // second point fprintf(outfile, "center(%f,%f,%f)\n", ORDER(to[0], to[1], to[2])); // radius fprintf(outfile, "radius %f\n}\n", float(lineWidth)*DEFAULT_RADIUS); i++; } } else { msgErr << "ArtDisplayDevice: Unknown line style " << lineStyle << sendmsg; } }
void ap_physical_gradients(double* restrict C, double* restrict B, double* restrict ref_dx, double* restrict ref_dy, LAPACKINDEX num_points, double* restrict dx, double* restrict dy) { double dx_unmapped[21*num_points]; double dy_unmapped[21*num_points]; int i; /* stuff for DGEMM */ LAPACKINDEX i_twentyone = 21; /* Calculate the physical-to-reference mapping. */ const double B_det_inv = 1/(B[ORDER(0, 0, 2, 2)]*B[ORDER(1, 1, 2, 2)] - B[ORDER(0, 1, 2, 2)]*B[ORDER(1, 0, 2, 2)]); const double B_inv00 = B_det_inv*B[ORDER(1, 1, 2, 2)]; const double B_inv01 = -B_det_inv*B[ORDER(0, 1, 2, 2)]; const double B_inv10 = -B_det_inv*B[ORDER(1, 0, 2, 2)]; const double B_inv11 = B_det_inv*B[ORDER(0, 0, 2, 2)]; /* * Perform the transformation using B inverse. This is equivalent to * putting the reference values in long columns side-by-side and * multiplying by B_inv. */ for (i = 0; i < 21*num_points; i++) { dx_unmapped[i] = B_inv00*ref_dx[i] + B_inv10*ref_dy[i]; dy_unmapped[i] = B_inv01*ref_dx[i] + B_inv11*ref_dy[i]; }
SInt32 OSOrderedSet::orderObject( const OSMetaClassBase * anObject ) { return( ORDER( anObject, 0 )); }
struct fpn * fpu_add(struct fpemu *fe) { struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r; u_int r0, r1, r2, r3; int rd; /* * Put the `heavier' operand on the right (see fpu_emu.h). * Then we will have one of the following cases, taken in the * following order: * * - y = NaN. Implied: if only one is a signalling NaN, y is. * The result is y. * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN * case was taken care of earlier). * If x = -y, the result is NaN. Otherwise the result * is y (an Inf of whichever sign). * - y is 0. Implied: x = 0. * If x and y differ in sign (one positive, one negative), * the result is +0 except when rounding to -Inf. If same: * +0 + +0 = +0; -0 + -0 = -0. * - x is 0. Implied: y != 0. * Result is y. * - other. Implied: both x and y are numbers. * Do addition a la Hennessey & Patterson. */ DPRINTF(FPE_REG, ("fpu_add:\n")); DUMPFPN(FPE_REG, x); DUMPFPN(FPE_REG, y); DPRINTF(FPE_REG, ("=>\n")); ORDER(x, y); if (ISNAN(y)) { fe->fe_cx |= FPSCR_VXSNAN; DUMPFPN(FPE_REG, y); return (y); } if (ISINF(y)) { if (ISINF(x) && x->fp_sign != y->fp_sign) { fe->fe_cx |= FPSCR_VXISI; return (fpu_newnan(fe)); } DUMPFPN(FPE_REG, y); return (y); } rd = ((fe->fe_fpscr) & FPSCR_RN); if (ISZERO(y)) { if (rd != FSR_RD_RM) /* only -0 + -0 gives -0 */ y->fp_sign &= x->fp_sign; else /* any -0 operand gives -0 */ y->fp_sign |= x->fp_sign; DUMPFPN(FPE_REG, y); return (y); } if (ISZERO(x)) { DUMPFPN(FPE_REG, y); return (y); } /* * We really have two numbers to add, although their signs may * differ. Make the exponents match, by shifting the smaller * number right (e.g., 1.011 => 0.1011) and increasing its * exponent (2^3 => 2^4). Note that we do not alter the exponents * of x and y here. */ r = &fe->fe_f3; r->fp_class = FPC_NUM; if (x->fp_exp == y->fp_exp) { r->fp_exp = x->fp_exp; r->fp_sticky = 0; } else { if (x->fp_exp < y->fp_exp) { /* * Try to avoid subtract case iii (see below). * This also guarantees that x->fp_sticky = 0. */ SWAP(x, y); } /* now x->fp_exp > y->fp_exp */ r->fp_exp = x->fp_exp; r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp); } r->fp_sign = x->fp_sign; if (x->fp_sign == y->fp_sign) { FPU_DECL_CARRY /* * The signs match, so we simply add the numbers. The result * may be `supernormal' (as big as 1.111...1 + 1.111...1, or * 11.111...0). If so, a single bit shift-right will fix it * (but remember to adjust the exponent). */ /* r->fp_mant = x->fp_mant + y->fp_mant */ FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]); FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]); FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]); FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]); if ((r->fp_mant[0] = r0) >= FP_2) { (void) fpu_shr(r, 1); r->fp_exp++; } } else { FPU_DECL_CARRY /* * The signs differ, so things are rather more difficult. * H&P would have us negate the negative operand and add; * this is the same as subtracting the negative operand. * This is quite a headache. Instead, we will subtract * y from x, regardless of whether y itself is the negative * operand. When this is done one of three conditions will * hold, depending on the magnitudes of x and y: * case i) |x| > |y|. The result is just x - y, * with x's sign, but it may need to be normalized. * case ii) |x| = |y|. The result is 0 (maybe -0) * so must be fixed up. * case iii) |x| < |y|. We goofed; the result should * be (y - x), with the same sign as y. * We could compare |x| and |y| here and avoid case iii, * but that would take just as much work as the subtract. * We can tell case iii has occurred by an overflow. * * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0. */ /* r->fp_mant = x->fp_mant - y->fp_mant */ FPU_SET_CARRY(y->fp_sticky); FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]); FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]); FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]); FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]); if (r0 < FP_2) { /* cases i and ii */ if ((r0 | r1 | r2 | r3) == 0) { /* case ii */ r->fp_class = FPC_ZERO; r->fp_sign = rd == FSR_RD_RM; return (r); } } else { /* * Oops, case iii. This can only occur when the * exponents were equal, in which case neither * x nor y have sticky bits set. Flip the sign * (to y's sign) and negate the result to get y - x. */ #ifdef DIAGNOSTIC if (x->fp_exp != y->fp_exp || r->fp_sticky) panic("fpu_add"); #endif r->fp_sign = y->fp_sign; FPU_SUBS(r3, 0, r3); FPU_SUBCS(r2, 0, r2); FPU_SUBCS(r1, 0, r1); FPU_SUBC(r0, 0, r0); } r->fp_mant[3] = r3; r->fp_mant[2] = r2; r->fp_mant[1] = r1; r->fp_mant[0] = r0; if (r0 < FP_1) fpu_norm(r); } DUMPFPN(FPE_REG, r); return (r); }
void ap_matrix_betaplane(double* restrict C, double* restrict B, double* restrict ref_values, double* restrict ref_dx, double* restrict ref_dy, double* restrict weights, LAPACKINDEX num_points, double* restrict betaplane) { int i; double values[21*num_points]; double dx[21*num_points]; double dy[21*num_points]; double weights_scaled[num_points]; /* stuff for DGEMM. */ LAPACKINDEX i_twentyone = 21; const double jacobian = fabs(B[ORDER(0, 0, 2, 2)]*B[ORDER(1, 1, 2, 2)] - B[ORDER(0, 1, 2, 2)]*B[ORDER(1, 0, 2, 2)]); ap_physical_gradients(C, B, ref_dx, ref_dy, num_points, dx, dy); ap_physical_values(C, ref_values, num_points, values); /* scale the weights by the jacobian. */ for (i = 0; i < num_points; i++) { weights_scaled[i] = weights[i]*jacobian; } /* * scale the function values by the weights and determinant. Then * perform matrix multiplication. */ ap_diagonal_multiply(21, num_points, values, weights_scaled);
struct fpn * fpu_div(struct fpemu *fe) { struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; u_int q, bit; u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; FPU_DECL_CARRY /* * Since divide is not commutative, we cannot just use ORDER. * Check either operand for NaN first; if there is at least one, * order the signalling one (if only one) onto the right, then * return it. Otherwise we have the following cases: * * Inf / Inf = NaN, plus NV exception * Inf / num = Inf [i.e., return x] * Inf / 0 = Inf [i.e., return x] * 0 / Inf = 0 [i.e., return x] * 0 / num = 0 [i.e., return x] * 0 / 0 = NaN, plus NV exception * num / Inf = 0 * num / num = num (do the divide) * num / 0 = Inf, plus DZ exception */ DPRINTF(FPE_REG, ("fpu_div:\n")); DUMPFPN(FPE_REG, x); DUMPFPN(FPE_REG, y); DPRINTF(FPE_REG, ("=>\n")); if (ISNAN(x) || ISNAN(y)) { ORDER(x, y); fe->fe_cx |= FPSCR_VXSNAN; DUMPFPN(FPE_REG, y); return (y); } /* * Need to split the following out cause they generate different * exceptions. */ if (ISINF(x)) { if (x->fp_class == y->fp_class) { fe->fe_cx |= FPSCR_VXIDI; return (fpu_newnan(fe)); } DUMPFPN(FPE_REG, x); return (x); } if (ISZERO(x)) { fe->fe_cx |= FPSCR_ZX; if (x->fp_class == y->fp_class) { fe->fe_cx |= FPSCR_VXZDZ; return (fpu_newnan(fe)); } DUMPFPN(FPE_REG, x); return (x); } /* all results at this point use XOR of operand signs */ x->fp_sign ^= y->fp_sign; if (ISINF(y)) { x->fp_class = FPC_ZERO; DUMPFPN(FPE_REG, x); return (x); } if (ISZERO(y)) { fe->fe_cx = FPSCR_ZX; x->fp_class = FPC_INF; DUMPFPN(FPE_REG, x); return (x); } /* * Macros for the divide. See comments at top for algorithm. * Note that we expand R, D, and Y here. */ #define SUBTRACT /* D = R - Y */ \ FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) #define NONNEGATIVE /* D >= 0 */ \ ((int)d0 >= 0) #ifdef FPU_SHL1_BY_ADD #define SHL1 /* R <<= 1 */ \ FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) #else #define SHL1 \ r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 #endif #define LOOP /* do ... while (bit >>= 1) */ \ do { \ SHL1; \ SUBTRACT; \ if (NONNEGATIVE) { \ q |= bit; \ r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ } \ } while ((bit >>= 1) != 0) #define WORD(r, i) /* calculate r->fp_mant[i] */ \ q = 0; \ bit = 1 << 31; \ LOOP; \ (x)->fp_mant[i] = q /* Setup. Note that we put our result in x. */ r0 = x->fp_mant[0]; r1 = x->fp_mant[1]; r2 = x->fp_mant[2]; r3 = x->fp_mant[3]; y0 = y->fp_mant[0]; y1 = y->fp_mant[1]; y2 = y->fp_mant[2]; y3 = y->fp_mant[3]; bit = FP_1; SUBTRACT; if (NONNEGATIVE) { x->fp_exp -= y->fp_exp; r0 = d0, r1 = d1, r2 = d2, r3 = d3; q = bit; bit >>= 1; } else {
/* * The multiplication algorithm for normal numbers is as follows: * * The fraction of the product is built in the usual stepwise fashion. * Each step consists of shifting the accumulator right one bit * (maintaining any guard bits) and, if the next bit in y is set, * adding the multiplicand (x) to the accumulator. Then, in any case, * we advance one bit leftward in y. Algorithmically: * * A = 0; * for (bit = 0; bit < FP_NMANT; bit++) { * sticky |= A & 1, A >>= 1; * if (Y & (1 << bit)) * A += X; * } * * (X and Y here represent the mantissas of x and y respectively.) * The resultant accumulator (A) is the product's mantissa. It may * be as large as 11.11111... in binary and hence may need to be * shifted right, but at most one bit. * * Since we do not have efficient multiword arithmetic, we code the * accumulator as four separate words, just like any other mantissa. * We use local variables in the hope that this is faster than memory. * We keep x->fp_mant in locals for the same reason. * * In the algorithm above, the bits in y are inspected one at a time. * We will pick them up 32 at a time and then deal with those 32, one * at a time. Note, however, that we know several things about y: * * - the guard and round bits at the bottom are sure to be zero; * * - often many low bits are zero (y is often from a single or double * precision source); * * - bit FP_NMANT-1 is set, and FP_1*2 fits in a word. * * We can also test for 32-zero-bits swiftly. In this case, the center * part of the loop---setting sticky, shifting A, and not adding---will * run 32 times without adding X to A. We can do a 32-bit shift faster * by simply moving words. Since zeros are common, we optimize this case. * Furthermore, since A is initially zero, we can omit the shift as well * until we reach a nonzero word. */ struct fpn * fpu_mul(struct fpemu *fe) { struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m; int sticky; FPU_DECL_CARRY; /* * Put the `heavier' operand on the right (see fpu_emu.h). * Then we will have one of the following cases, taken in the * following order: * * - y = NaN. Implied: if only one is a signalling NaN, y is. * The result is y. * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN * case was taken care of earlier). * If x = 0, the result is NaN. Otherwise the result * is y, with its sign reversed if x is negative. * - x = 0. Implied: y is 0 or number. * The result is 0 (with XORed sign as usual). * - other. Implied: both x and y are numbers. * The result is x * y (XOR sign, multiply bits, add exponents). */ DPRINTF(FPE_REG, ("fpu_mul:\n")); DUMPFPN(FPE_REG, x); DUMPFPN(FPE_REG, y); DPRINTF(FPE_REG, ("=>\n")); ORDER(x, y); if (ISNAN(y)) { y->fp_sign ^= x->fp_sign; fe->fe_cx |= FPSCR_VXSNAN; DUMPFPN(FPE_REG, y); return (y); } if (ISINF(y)) { if (ISZERO(x)) { fe->fe_cx |= FPSCR_VXIMZ; return (fpu_newnan(fe)); } y->fp_sign ^= x->fp_sign; DUMPFPN(FPE_REG, y); return (y); } if (ISZERO(x)) { x->fp_sign ^= y->fp_sign; DUMPFPN(FPE_REG, x); return (x); } /* * Setup. In the code below, the mask `m' will hold the current * mantissa byte from y. The variable `bit' denotes the bit * within m. We also define some macros to deal with everything. */ x3 = x->fp_mant[3]; x2 = x->fp_mant[2]; x1 = x->fp_mant[1]; x0 = x->fp_mant[0]; sticky = a3 = a2 = a1 = a0 = 0; #define ADD /* A += X */ \ FPU_ADDS(a3, a3, x3); \ FPU_ADDCS(a2, a2, x2); \ FPU_ADDCS(a1, a1, x1); \ FPU_ADDC(a0, a0, x0) #define SHR1 /* A >>= 1, with sticky */ \ sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \ a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1 #define SHR32 /* A >>= 32, with sticky */ \ sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0 #define STEP /* each 1-bit step of the multiplication */ \ SHR1; if (bit & m) { ADD; }; bit <<= 1 /* * We are ready to begin. The multiply loop runs once for each * of the four 32-bit words. Some words, however, are special. * As noted above, the low order bits of Y are often zero. Even * if not, the first loop can certainly skip the guard bits. * The last word of y has its highest 1-bit in position FP_NMANT-1, * so we stop the loop when we move past that bit. */ if ((m = y->fp_mant[3]) == 0) { /* SHR32; */ /* unneeded since A==0 */ } else { bit = 1 << FP_NG; do { STEP; } while (bit != 0); } if ((m = y->fp_mant[2]) == 0) { SHR32; } else { bit = 1; do { STEP; } while (bit != 0); } if ((m = y->fp_mant[1]) == 0) { SHR32; } else { bit = 1; do { STEP; } while (bit != 0); } m = y->fp_mant[0]; /* definitely != 0 */ bit = 1; do { STEP; } while (bit <= m); /* * Done with mantissa calculation. Get exponent and handle * 11.111...1 case, then put result in place. We reuse x since * it already has the right class (FP_NUM). */ m = x->fp_exp + y->fp_exp; if (a0 >= FP_2) { SHR1; m++; } x->fp_sign ^= y->fp_sign; x->fp_exp = m; x->fp_sticky = sticky; x->fp_mant[3] = a3; x->fp_mant[2] = a2; x->fp_mant[1] = a1; x->fp_mant[0] = a0; DUMPFPN(FPE_REG, x); return (x); }