_f_int8 _UBOUND0_8(DopeVectorType * source, _f_int * dimptr) { _f_int8 iresult; int dim; int rank; /* If source is a pointer/allocatable array, it must be * associated/allocated. */ if (source->p_or_a && !source->assoc) _lerror (_LELVL_ABORT, FENMPTAR, "UBOUND"); /* argument DIM must be within source array rank */ rank = source->n_dim; dim = *dimptr - 1; if (dim < 0 || dim >= rank) _lerror (_LELVL_ABORT, FENMSCDM, "UBOUND"); /* Return low_bound+extent-1 for nonzero extent, else return zero */ if(source->dimension[dim].extent != 0) iresult = (_f_int8) (source->dimension[dim].low_bound + source->dimension[dim].extent) - 1; else iresult = (_f_int8) 0; return(iresult); }
_f_int4 _SIZE_4 (DopeVectorType * source, _f_int *dimptr) { _f_int4 iresult; int dim; int rank; int loopj; /* If source is a pointer/allocatable array, it must be * associated/allocated. */ if (source->p_or_a && !source->assoc) _lerror (_LELVL_ABORT, FENMPTAR, "SIZE"); rank = source->n_dim; if (dimptr == NULL) { iresult = 1; /* Retrieve product of extents */ for (loopj = 0; loopj < rank; loopj++) iresult = iresult * source->dimension[loopj].extent; } else { /* argument DIM must be within source array rank */ dim = *dimptr - 1; if (dim < 0 || dim >= rank) _lerror (_LELVL_ABORT, FENMSCDM, "SIZE"); /* Return extent */ iresult = source->dimension[dim].extent; } return(iresult); }
_f_real8 _NEAREST(_f_real8 x, _f_real8 s) { #ifdef KEY /* Bug 10771 */ if (s == (_f_real8) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } _f_int8 infinity = signbit(s) ? (0x8000000000000000ull | IEEE_64_INFINITY) : IEEE_64_INFINITY; _f_real8 result = nextafter(x, * (_f_real8 *) &infinity); return result; #elif 0 /* KEY Bug 3399 */ /* See comment in _NEAREST_4 */ REGISTER_8 x_reg; int positive_s = (s > (_f_real8) 0.0); if (s == (_f_real8) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } x_reg.f = x; if (IEEE_64_EXPO_ALL_ONES(x_reg.ui)) { return x; } if (x == (_f_real8) 0.0) { /* either +0.0 or -0.0 */ x_reg.ui = positive_s ? 1 : (IEEE_64_SIGN_BIT | 1); } else { int increment = (positive_s == (x > (_f_real8) 0.0)) ? 1 : -1; x_reg.ui += increment; } return x_reg.f; #else REGISTER_8 s1, s2; s1.f = x; if (s == 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } s2.ui = (s1.f > 0) ? LL_CONST(0x1) : -(LL_CONST(0x1)); if (s1.f == 0.0) { s1.f = (s > 0.0) ? TINY_REAL8_F90 : -TINY_REAL8_F90; } else if (s > 0.0) { s1.ui += s2.ui; } else { s1.ui -= s2.ui; } if (isnormal64(s1.ui)) return s1.f; if (x > 1.0 || x < -1.0) return s1.f; return (0.0); #endif /* KEY */ }
void _BOUNDS_ERROR( char *file, /* Fortran routine containing error */ int *line, /* Line number in Fortran routine */ char *variable, /* arrayname with out-of-bounds subscript */ int *dim, /* Dimension number of the array */ int *lowerbnd, /* Lower bound of dimension dim */ int *upperbnd, /* Upper bound of dimension dim */ int sub[1], /* Out-of-bounds subscript value */ int *count) /* Count/flag for number of messages */ { int *retcnt; /* ptr to static arg count word */ int intcnt = 0; /* local count if no count passed */ #ifdef _UNICOS /* Use local variable if count argument not passed. */ if (_numargs() < 8) retcnt = &intcnt; else #endif retcnt = count; if ((*retcnt)++ == 0) { #ifdef KEY /* Bug 7969 */ if (want_abort()) { (void) _lerror(_LELVL_MSG, FWARGSBV, sub[0], *dim, variable, *line, file, *lowerbnd, *upperbnd); do_abort(); } #endif /* KEY Bug 7969 */ (void) _fwarn(FWARGSBV, sub[0], *dim, variable, *line, file, *lowerbnd, *upperbnd); } return; }
_f_real4 _NEAREST_4_16(_f_real4 x, _f_real16 s) { REGISTER_4 s1, s2, s3; s1.f = x; if (s == 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } #if defined (_CRAY1) && defined(_CRAYIEEE) s3.ui = s1.ui & ~(IEEE_64_SIGN_BIT); s2.ui = (s1.f > 0) ? LL_CONST(0x20000000) : -(LL_CONST(0x20000000)); if ((_f_real4) TINY_REAL4_F90 > s3.f) s1.f = 0.0; #else s2.ui = (s1.f > 0) ? 0x1 : -(0x1); #endif if (s1.f == 0.0) { s1.f = (s > 0.0) ? (_f_real4) TINY_REAL4_F90 : (_f_real4) -TINY_REAL4_F90; } else if (s > 0.0) { s1.ui += s2.ui; } else { s1.ui -= s2.ui; } #if defined (_CRAY1) && defined(_CRAYIEEE) if (isnormal64(s1.ui)) #else if (isnormal32(s1.ui)) #endif return s1.f; if (x > 1.0 || x < -1.0) return s1.f; return (0.0); }
void AQWAIT( _f_int *aqp, _f_int *status, _f_int *reply) { AQFIL *f; struct fflistreq *nxtq; _f_int dummy, *lreply; /* * UNICOS 8.0 and previous quietly permitted fewer than 2 arguments, * even though our documentatiokn for AQWAIT has required >= 2 args * for some time. Do the service of printing an error message if a * dusty deck code happens to use < 2 arguments. */ if (_numargs() < 2) _lerror(_LELVL_ABORT, FEARGCNT, "AQWAIT", _numargs(), "2 or 3"); /* * reply is an optional argument. */ lreply = reply; if (_numargs() < 3) lreply = &dummy; f = (AQFIL *) *aqp; if (f == NULL || f->aq_chk != AQ_CHK) { *status = -FEAQBADH; /* file handle is not valid */ return; } if (f->aq_busy == f->aq_nxtq) { *status = IDLE; return; } *status = OK; AQ_LOCK(aq_lkbusy); nxtq = f->aq_nxtq; _aqwait(f, status, lreply, nxtq); AQ_UNLOCK(aq_lkbusy); if (*status < 0 && _numargs() <= 1) _lerror(_LELVL_ABORT, -(*status)); return; }
void _TASK_DV_GETFIRST_ERROR( char *variable, /* allocatable array or Fortran pointer */ char *file, /* Fortran routine containing error */ int *lineno) /* Line number in Fortran routine */ { (void) _lerror(_LELVL_ABORT, FENGFLCL, variable, file, *lineno); return; }
_f_real16 _NEAREST_16_8(_f_real16 x, _f_real8 s) { #if defined(_WORD32) union ldble_float { _f_real16 whole; unsigned long long ui[1]; } f,rslt; unsigned long long s2, s3, s4; #else union ldble_float { _f_real16 whole; unsigned long ui[1]; } f,rslt; unsigned long s2, s3, s4; #endif rslt.whole = x; f.whole = x; if (s == 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } s2 = (rslt.whole > 0) ? LL_CONST(0x1) : -(LL_CONST(0x1)); if (rslt.whole > 0) { /* if x > 0 and s > 0, check for all 7's in 2nd word */ s3 = IEEE_128_64_MANT2; /* if x > 0 and s < 0, check for all zeros in 2nd word */ s4 = LL_CONST(0x0); } else { /* if x < 0 and s > 0, check for all zeros in 2nd word */ s3 = LL_CONST(0x0); /* if x < 0 and s < 0, check for all 7's in 2nd word */ s4 = IEEE_128_64_MANT2; } if (rslt.whole == 0.0) { rslt.whole = (s > 0.0) ? TINY_REAL16_F90 : -TINY_REAL16_F90; } else if (s > 0.0) { rslt.ui[1] += s2; if (f.ui[1] == s3) { rslt.ui[0] += s2; } } else { rslt.ui[1] -= s2; if (f.ui[1] == s4) { rslt.ui[0] -= s2; } } if (isnormal128(rslt.whole)) return rslt.whole; if (x > 1.0 || x < -1.0) return rslt.whole; return (0.0); }
void __f90_bounds_check(char *procedure_name, _f_int4 line_number, char *array_name, _f_int4 axis_number) { char *unknown_nm = "name_unknown"; char *abort_now = NULL; char *rtn_nm = "__f90_bounds_check"; /* note mips f90 compiler appends extra characters at * the end of the procedure name. */ /* MIPSpro 7.2 and 7.2.1 documentation assumed that the * routine would abort if this environment variable is * is set. Otherwise, the message is just a warning. */ abort_now = getenv("F90_BOUNDS_CHECK_ABORT"); if (abort_now) { if (*abort_now == 'y' || *abort_now == 'Y') { if (array_name) { (void) _lerror(_LELVL_MSG, FWARGSVB, axis_number, array_name, line_number, procedure_name, rtn_nm); } else { (void) _lerror(_LELVL_MSG, FWARGSVB, axis_number, unknown_nm, line_number, procedure_name, rtn_nm); } /* cleanup the fortran units before abort */ _fcleanup(); abort(); } } if (array_name) (void) _fwarn(FWARGSVB,axis_number, array_name, line_number, procedure_name, rtn_nm); else (void) _fwarn(FWARGSVB,axis_number, unknown_nm, line_number, procedure_name, rtn_nm); return; }
void RNLCOMM(_fcd chr, _f_int *mode) { int thechar; if (_numargs() != (sizeof(_fcd) + sizeof(long *))/ sizeof(long)) _lerror(_LELVL_ABORT,FEARGLST, "RNLCOMM"); thechar = _getfchar(chr); TOGGLE_CHAR(thechar, MRNLCOMM, *mode); return; }
/* * Error handler for an array syntax conformance warning. * This entry is called by the f90 compiler on IRIX only. * * Input Arguments: * file - File name in which error occurred. * line - Line number in file. * dim - Dimension number which has a size mismatch. * extent1 - One of two mismatched 64-bit extents of dimension dim. * extent2 - One of two mismatched 64-bit extents of dimension dim. * * ON MIPS, if the environment varaiable is set to Y(ES), produce an * error message with the information and then abort. Otherwise, * produce a warning for the conformity check. * * When the input dimension is zero, this routine is being called * from an inline version of a transformational function such as * MATMUL and the use of a dimension would be confusing since the * first dimension of one argument and the second dimension of the * other argument are mismatched. * * When the input dimension is nonzero, this routine is being called * when the specified dimension is the same for both arguments. * * The message contains the name of this routine for debugging. */ void __f90_conform_check( char *file, int line, int dim, long long extent1, long long extent2) { char *abort_now = NULL; char *rtn_nm = "__f90_conform_check"; abort_now = getenv("F90_CONFORM_CHECK_ABORT"); if (extent1 < 0) extent1 = 0; if (extent2 < 0) extent2 = 0; /* abort only if environment variable is present and is Y(es) */ if (abort_now && (*abort_now == 'y' || *abort_now == 'Y')) { /* Use proper message depending on value of dimension. */ if (dim != 0) (void) _lerror(_LELVL_MSG,FWARGDMD, dim, line, file, extent1, extent2, rtn_nm); else (void) _lerror(_LELVL_MSG,FWARGDMZ, line, file, extent1, extent2, rtn_nm); _fcleanup(); abort(); } /* Use proper message depending on value of dimension. */ if (dim != 0) (void) _fwarn(FWARGDMD, dim, line, file, extent1, extent2, rtn_nm); else (void) _fwarn(FWARGDMZ, line, file, extent1, extent2, rtn_nm); }
void RNLSEP(_fcd chr, _f_int *mode) { int thechar; if (_numargs() != (sizeof(_fcd) + sizeof(long*))/sizeof(long)) _lerror(_LELVL_ABORT,FEARGLST, "RNLSEP"); thechar = _getfchar(chr); if (thechar == ' ') _BLNKSEP = *mode; TOGGLE_CHAR(thechar, MRNLSEP, *mode); return; }
/* * Error handler for an of out of bounds substring. * * Input: * file - File name in which error occurred. * line - Line number in file. * variable - Name of array which had an out of bounds substring. * size - Substring size. * start - Out of bounds substring start. * length - Out of bounds substring length. * count - Static count/flag to indicate if this message was * already given for this statement. */ void _SBOUNDS_ERROR( char *file, int *line, char *variable, int *size, int *subst, int *subln, int *count ) { int *retcnt; /* ptr to static arg count word */ int intcnt = 0; /* local count if no count passed */ int endst; #ifdef _UNICOS /* Use local variable if count argument not passed. */ if (_numargs() < 7) retcnt = &intcnt; else #endif retcnt = count; /* if substring length is zero or negative, not incorrect */ if ( *subln > 0) { if ((*retcnt)++ == 0) { /* calculate substring end. * subln is calculated by (ln = s2 - s1 + 1) * endst is calculated by (s2 = ln + s1 - 1) */ endst = *subln + *subst - 1; #ifdef KEY /* Bug 7969 */ if (want_abort()) { (void) _lerror(_LELVL_MSG, FWARGSTR, *subst, endst, variable, *line, file, *size); do_abort(); } #endif /* KEY Bug 7969 */ (void) _fwarn (FWARGSTR, *subst, endst, variable, *line, file, *size); } } return; }
/* * NTON integer*32 raised to an integer*32 */ _f_int4 _NTON( _f_int4 x, _f_int4 y ) { _f_int4 base, result, i; if (x == 0) { if (y == 0) { #if defined(__mips) || defined(_LITTLE_ENDIAN) return(1); #else _lerror(_LELVL_ABORT, FEIPOWZR); #endif } return(0); } if (y < 0) { result = 0; if ((x == 1) || (x == -1)) { result = 1; if (((y & 1) == 1) && (x == -1)) result = -1; } } else { if (y == 0) return(1); base = x; if (x < 0) base = -x; result = 1; i = y; while (i != 0) { if ((i & 1) == 1) result *= base; base *= base; i = (unsigned) i >> 1; }; if ((x < 0) && ((y & 1) == 1)) result = -result; } return(result); }
_f_real8 _NEAREST_8_16(_f_real8 x, _f_real16 s) { REGISTER_8 s1, s2; s1.f = x; if (s == 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } s2.ui = (s1.f > 0) ? LL_CONST(0x1) : -(LL_CONST(0x1)); if (s1.f == 0.0) { s1.f = (s > 0.0) ? TINY_REAL8_F90 : -TINY_REAL8_F90; } else if (s > 0.0) { s1.ui += s2.ui; } else { s1.ui -= s2.ui; } if (isnormal64(s1.ui)) return s1.f; if (x > 1.0 || x < -1.0) return s1.f; return (0.0); }
/* * NTOI - integer*32 raised to an integer*64 */ _f_int8 _NTOI( _f_int4 x, _f_int8 y ) { long base, result, i; if (x == 0) { if (y == 0) { _lerror(_LELVL_ABORT, FEIPOWZR); } return(0); } if (y < 0) { result = 0; if ((x == 1) || (x == -1)) { result = 1; if (((y & 1) == 1) && (x == -1)) result = -1; } } else { if (y == 0) return(1); base = x; if (x < 0) base = -x; result = 1; i = y; while (i != 0) { if ((i & 1) == 1) result *= base; base *= base; i = (unsigned) i >> 1; }; if ((x < 0) && ((y & 1) == 1)) result = -result; }; return(result); }
/* NEAREST - return the nearest different machine representable number in a * given direction s for 32-bit and 64-bit values. Returns * the argument x if s = zero. The result is undefined in f90 * when s = zero. */ _f_real4 _NEAREST_4(_f_real4 x, _f_real4 s) { #ifdef KEY /* Bug 10771 */ /* Previous approach (in "elif") didn't treat infinity correctly and didn't * signal exceptions correctly. Let's try using the C library functions in * hopes that they know what they're doing. */ if (s == (_f_real4) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } _f_int4 infinity = signbit(s) ? (0x80000000 | IEEE_32_INFINITY) : IEEE_32_INFINITY; _f_real4 result = nextafterf(x, * (_f_real4 *) &infinity); return result; #elif 0 /* KEY Bug 3399 */ /* * We want "nearest(nearest(x, s), -s) == x" to be true so long as * IEEE infinity and NaN aren't involved. We do allow largest/smallest * number to turn into infinity, but we don't allowe infinity to turn * back into largest/smallest number. * * Here's a summary of the unsigned bit patterns for IEEE floating * point: * * 1 11-11 11------11 "Largest magnitude negative" NaN * 1 11-11 00------01 "Smallest magnitude negative" NaN * 1 11-11 00------00 Negative infinity * 1 11-10 11------11 Largest-magnitude negative normalized * 1 00-01 00------00 Smallest-magnitude negative normalized * 1 00-00 11------11 Largest-magnitude negative denorm * 1 00-00 00------01 Smallest-magnitude negative denorm * 1 00-00 00------00 Negative zero * 0 11-11 11------11 "Largest positive" NaN * 0 11-11 00------01 "Smallest positive" NaN * 0 11-11 00------00 Positive infinity * 0 11-10 11------11 Largest-magnitude positive normalized * 0 00-01 00------00 Smallest-magnitude positive normalized * 0 00-00 11------11 Largest-magnitude positive denorm * 0 00-00 00------01 Smallest-magnitude positive denorm * 0 00-00 00------00 Zero * * Our strategy is: * 1. s == 0 is a fatal error * 2. if x == infinity or NaN, return it unchanged * 3. if x == +0 or -0, return smallest-magnitude denorm whose sign * matches that of s * 4. if the signs of x and s match, add 1 to bit pattern of x * (increasing its floating-point magnitude); else subtract 1 from * bit pattern of x (decreasing its magnitude) */ REGISTER_4 x_reg; int positive_s = (s > (_f_real4) 0.0); if (s == (_f_real4) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } x_reg.f = x; if (IEEE_32_EXPO_ALL_ONES(x_reg.ui)) { return x; } if (x == (_f_real4) 0.0) { /* either +0.0 or -0.0 */ x_reg.ui = positive_s ? 1 : (IEEE_32_SIGN_BIT | 1); } else { int increment = (positive_s == (x > (_f_real4) 0.0)) ? 1 : -1; x_reg.ui += increment; } return x_reg.f; #else REGISTER_4 s1, s2, s3; s1.f = x; if (s == (_f_real4) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } #if defined (_CRAY1) && defined(_CRAYIEEE) s3.ui = s1.ui & ~(IEEE_64_SIGN_BIT); s2.ui = (s1.f > 0) ? LL_CONST(0x20000000) : -(LL_CONST(0x20000000)); if ((_f_real4) TINY_REAL4_F90 > s3.f) s1.f = 0.0; #else s2.ui = (s1.f > 0) ? 0x1 : -(0x1); #endif if (s1.f == (_f_real4) 0.0) { s1.f = (s > (_f_real4) 0.0) ? (_f_real4) TINY_REAL4_F90 : (_f_real4) -TINY_REAL4_F90; } else if (s > (_f_real4) 0.0) { s1.ui += s2.ui; } else { s1.ui -= s2.ui; } #if defined (_CRAY1) && defined(_CRAYIEEE) if (isnormal64(s1.ui)) #else if (isnormal32(s1.ui)) #endif return s1.f; if (x > 1.0 || x < -1.0) return (s1.f); return (0.0); #endif /* KEY */ }
void __all ( DopeVectorType * result, DopeVectorType * mask, _f_int *dimension) { int c_dim; /* C form of input dimension */ int other_dim; /* other dimension in rank-2 */ int num_elts = 1; /* elts in result array */ long nbytes = 0; /* bytes to malloc */ _f_log * irptr; /* ptr to result array */ _f_log * imptr; /* ptr to mask array */ _f_log4 * i4rptr; /* ptr to result array */ _f_log4 * i4mptr; /* ptr to mask array */ _f_log8 * i8rptr; /* ptr to result array */ _f_log8 * i8mptr; /* ptr to mask array */ #ifdef _F_LOG1 _f_log1 * i1rptr; /* ptr to result array */ _f_log1 * i1mptr; /* ptr to mask array */ #endif #ifdef _F_LOG2 _f_log2 * i2rptr; /* ptr to result array */ _f_log2 * i2mptr; /* ptr to mask array */ #endif long i, j; /* index variables */ long indx, jndx; /* loop indices */ int done, stop; /* work done indicators */ int el_len; /* LTOB length indicator */ int mshftct=0; /* mask amount to shift index */ int rshftct=0; /* result amount to shift index */ /* Per-dimension arrays */ long current_place[MAXDIM-1]; /* current place*/ long mask_offset[MAXDIM-1]; /* mask offset */ long mask_extent[MAXDIM-1]; /* mask extent */ long mask_stride[MAXDIM-1]; /* mask stride */ long result_offset[MAXDIM-1]; /* result offset*/ long result_stride[MAXDIM-1]; /* result stride*/ long cdim_mask_stride; /* cdim stride */ /* Validate dimension variable */ if (dimension != NULL && mask->n_dim > 1) { c_dim = *dimension - 1; if (c_dim < 0 || c_dim >= mask->n_dim) _lerror (_LELVL_ABORT, FESCIDIM); } else { c_dim = 0; if (dimension != NULL) { if (*dimension < 1 || *dimension > mask->n_dim) _lerror (_LELVL_ABORT, FESCIDIM); } } /* Setup dope vector for result array */ if (!result->assoc) { int sm = 1; if (result->base_addr.a.el_len >= BITS_PER_WORD) sm = result->base_addr.a.el_len / BITS_PER_WORD; if (dimension != NULL) { for (i = 0; i < c_dim; i++) { result->dimension[i].extent = mask->dimension[i].extent; result->dimension[i].low_bound = 1; result->dimension[i].stride_mult = num_elts * sm; num_elts *= mask->dimension[i].extent; } for ( ; i < result->n_dim; i++) { result->dimension[i].extent = mask->dimension[i+1].extent; result->dimension[i].low_bound = 1; result->dimension[i].stride_mult = num_elts * sm; num_elts *= mask->dimension[i+1].extent; } } result->base_addr.a.ptr = (void *) NULL; nbytes = ((num_elts * result->base_addr.a.el_len) / BITS_PER_BYTE); if (nbytes != 0) { result->base_addr.a.ptr = (void *) malloc (nbytes); if (result->base_addr.a.ptr == NULL) _lerror(_LELVL_ABORT, FENOMEMY); result->assoc = 1; } /* set fields for null array as well */ result->orig_base = result->base_addr.a.ptr; result->orig_size = nbytes * BITS_PER_BYTE; } /* Set pointer to result array and initialize result array to TRUE */ irptr = (void *) result->base_addr.a.ptr; switch (result->type_lens.int_len) { case 64 : i8rptr = (_f_log8 *) result->base_addr.a.ptr; #ifdef _F_LOG4 if (sizeof(_f_int) == sizeof(_f_log4)) rshftct = 1; #endif #ifdef _UNICOS #pragma _CRI ivdep #endif for (i = 0; i < num_elts; i++) { i8rptr[i] = (_f_log8) _btol(1); } break; #ifdef _F_LOG2 case 16 : i2rptr = (_f_log2 *) result->base_addr.a.ptr; for (i = 0; i < num_elts; i++) { i2rptr[i] = (_f_log2) _btol(1); } break; #endif #ifdef _F_LOG1 case 8 : i1rptr = (_f_log1 *) result->base_addr.a.ptr; for (i = 0; i < num_elts; i++) { i1rptr[i] = (_f_log1) _btol(1); } break; #endif case 32 : default : i4rptr = (_f_log4 *) result->base_addr.a.ptr; #ifdef _UNICOS #pragma _CRI ivdep #endif for (i = 0; i < num_elts; i++) { i4rptr[i] = (_f_log4) _btol(1); } } imptr = (void *) mask->base_addr.a.ptr; switch (mask->type_lens.int_len) { case 64 : el_len = sizeof(_f_log8) * BITS_PER_BYTE; i8mptr = (_f_log8 *) imptr; #ifdef _F_LOG4 /* Set mask shftct for ALL with no size specified since * no size means a 64-bit logical value. A default of * 32-bit logical has a stride_mult of two for a 64-bit * logical on WORD32. Normally, the ALL_8 entry point * is used. On MPP, the stride_mult is one for 32-bit * or 64-bit logical. */ if (sizeof(_f_int) == sizeof(_f_log4)) mshftct = 1; #endif break; #ifdef _F_LOG2 case 16 : el_len = sizeof(_f_log2) * BITS_PER_BYTE; i2mptr = (_f_log2 *) imptr; break; #endif #ifdef _F_LOG1 case 8 : el_len = sizeof(_f_log1) * BITS_PER_BYTE; i1mptr = (_f_log1 *) imptr; break; #endif case 32 : default : el_len = sizeof(_f_log4) * BITS_PER_BYTE; i4mptr = (_f_log4 *) imptr; } /* check for zero-sized mask array */ for (i = 0; i < mask->n_dim; i++) { if (mask->dimension[i].extent == 0) return; } /* Handle a rank-one mask array */ if (mask->n_dim == 1) { /* Use local mask_stride and divide by two when two-word * logical is being done. */ #ifdef _F_LOG4 mask_stride[0] = (mask->dimension[0].stride_mult) >> mshftct; #else mask_stride[0] = mask->dimension[0].stride_mult; #endif /* Scan array until a FALSE element is found */ i = 0; indx = 0; switch (mask->type_lens.int_len) { case 64 : while (i < mask->dimension[0].extent) { if (LTOB(el_len, (i8mptr + indx))) { /* true element */ i++; indx = i * mask_stride[0]; } else { /* false element */ switch (result->type_lens.int_len) { case 64 : i8rptr[0] = (_f_log8) _btol(0); break; #ifdef _F_LOG2 case 16 : i2rptr[0] = (_f_log2) _btol(0); break; #endif #ifdef _F_LOG1 case 8 : i1rptr[0] = (_f_log1) _btol(0); break; #endif case 32 : default : i4rptr[0] = (_f_log4) _btol(0); } i = mask->dimension[0].extent; } } break; #ifdef _F_LOG2 case 16 : while (i < mask->dimension[0].extent) { if (LTOB(el_len, (i2mptr + indx))) { /* true element */ i++; indx = i * mask_stride[0]; } else { /* false element */ switch (result->type_lens.int_len) { case 64 : i8rptr[0] = (_f_log8) _btol(0); break; case 16 : i2rptr[0] = (_f_log2) _btol(0); break; #ifdef _F_LOG1 case 8 : i1rptr[0] = (_f_log1) _btol(0); break; #endif case 32 : default : i4rptr[0] = (_f_log4) _btol(0); } i = mask->dimension[0].extent; } } break; #endif #ifdef _F_LOG1 case 8 : while (i < mask->dimension[0].extent) { if (LTOB(el_len, (i1mptr + indx))) { /* true element */ i++; indx = i * mask_stride[0]; } else { /* false element */ switch (result->type_lens.int_len) { case 64 : i8rptr[0] = (_f_log8) _btol(0); break; case 16 : i2rptr[0] = (_f_log2) _btol(0); break; case 8 : i1rptr[0] = (_f_log1) _btol(0); break; case 32 : default : i4rptr[0] = (_f_log4) _btol(0); } i = mask->dimension[0].extent; } } break; #endif case 32 : default : while (i < mask->dimension[0].extent) { if (LTOB(el_len, (i4mptr + indx))) { /* true element */ i++; indx = i * mask_stride[0]; } else { /* false element */ switch (result->type_lens.int_len) { case 64 : i8rptr[0] = (_f_log8) _btol(0); break; #ifdef _F_LOG2 case 16 : i2rptr[0] = (_f_log2) _btol(0); break; #endif #ifdef _F_LOG1 case 8 : i1rptr[0] = (_f_log1) _btol(0); break; #endif case 32 : default : i4rptr[0] = (_f_log4) _btol(0); } i = mask->dimension[0].extent; } } } /* Handle a rank-two mask array */ } else if (mask->n_dim == 2) {
void _MERGE (DopeVectorType * result, DopeVectorType * tsource, DopeVectorType * fsource, DopeVectorType * mask) { char *cf; /* char ptr to fsource array */ char *ct; /* char ptr to tsource array */ char *cr; /* char ptr to result array */ int chrlenf; /* length from fsource */ int chrlenr; /* length from result */ char * restrict cptr1; /* char */ unsigned long * restrict uptr1; /* unsigned */ _f_real16 * restrict dptr1; /* double */ dblcmplx * restrict xptr1; /* double cmplx */ char * restrict cptr2; /* char */ unsigned long * restrict uptr2; /* unsigned */ _f_real16 * restrict dptr2; /* double */ dblcmplx * restrict xptr2; /* double cmplx */ char * restrict cptr3; /* char */ unsigned long * restrict uptr3; /* unsigned */ _f_real16 * restrict dptr3; /* double */ dblcmplx * restrict xptr3; /* double cmplx */ _f_int * restrict iptr4; /* int */ unsigned long * restrict fptr; /* fsource */ unsigned long * restrict rptr; /* result */ unsigned long * restrict tptr; /* tsource */ unsigned long * restrict mptr; /* mask */ int bucketsize; /* size of element */ int nbytes; /* number of bytes */ int nwords; /* number of words */ int curdim[7]; /* current indices */ int bytealligned; /* byte aligned flag */ int findx; /* fsource index */ int rindx; /* result index */ int mindx; /* mask index */ int tindx; /* tsource index */ int type; /* data type */ int rank; /* rank of result matrix */ int i, j, k; /* index variables */ int fls_ext[MAXDIM]; /* extents for fsource */ int fls_strd[MAXDIM]; /* element stride for field */ int fls_incr[MAXDIM]; /* incr for each index */ int msk_ext[MAXDIM]; /* extents for fsource */ int msk_strd[MAXDIM]; /* element stride for field */ int msk_incr[MAXDIM]; /* incr for each index */ int res_ext[MAXDIM]; /* extents for fsource */ int res_strd[MAXDIM]; /* element stride for field */ int res_incr[MAXDIM]; /* incr for each index */ int tru_ext[MAXDIM]; /* extents for fsource */ int tru_strd[MAXDIM]; /* element stride for field */ int tru_incr[MAXDIM]; /* incr for each index */ int fls_cum_decr; /* fsource cumulative decrement */ int msk_cum_decr; /* mask cumulative decrement */ int res_cum_decr; /* result cumulative decrement */ int tru_cum_decr; /* tsource cumulative decrement */ int tot_ext; /* total extent counter */ int msk_0_strd; /* scaler stride variable */ int res_0_strd; /* scaler stride variable */ int tru_0_strd; /* scaler stride variable */ int fls_0_strd; /* scaler stride variable */ int one; /* index holder */ int zero; /* index holder */ /* Set type and rank global variables */ type = fsource->type_lens.type; rank = mask->n_dim; /* * Initialize every element of every array to try and minimize problem * in compiler. */ for (i = 0; i < MAXDIM; i++) { fls_ext[i] = 0; fls_strd[i] = 0; fls_incr[i] = 0; msk_ext[i] = 0; msk_strd[i] = 0; msk_incr[i] = 0; res_ext[i] = 0; res_strd[i] = 0; res_incr[i] = 0; tru_ext[i] = 0; tru_strd[i] = 0; tru_incr[i] = 0; } /* Size calculation is based on variable type */ switch (type) { case DVTYPE_ASCII : bytealligned = 1; bucketsize = _fcdlen (fsource->base_addr.charptr); break; case DVTYPE_DERIVEDBYTE : bytealligned = 1; #ifndef _ADDR64 bucketsize = fsource->base_addr.a.el_len >> 3; /* bytes */ #else bucketsize = _fcdlen (fsource->base_addr.charptr);/* bytes */ #endif break; case DVTYPE_DERIVEDWORD : bytealligned = 0; #ifndef _ADDR64 bucketsize = fsource->base_addr.a.el_len >> 6; /* words */ #else bucketsize = _fcdlen (fsource->base_addr.charptr);/* bytes */ bucketsize >>= 3; /* words */ #endif break; default : bytealligned = 0; bucketsize = fsource->type_lens.int_len >> 6; /* words */ } /* Set up dope vector for result array */ if (!result->assoc) { result->base_addr.a.ptr = (void *) NULL; result->orig_base = 0; result->orig_size = 0; for (i = 0; i < rank; i++) { result->dimension[i].extent = mask->dimension[i].extent; result->dimension[i].low_bound = 1; result->dimension[i].stride_mult = mask->dimension[i].stride_mult * bucketsize; } /* Determine size of space to allocate */ if (!bytealligned) nbytes = bucketsize << 3; else nbytes = bucketsize; for (i = 0; i < rank; i++) nbytes *= mask->dimension[i].extent; nwords = nbytes >> 3; result->base_addr.a.ptr = (void *) malloc (nbytes); if (result->base_addr.a.ptr == NULL) _lerror (_LELVL_ABORT, FENOMEMY); result->assoc = 1; if (bytealligned) { cr = (char *) result->base_addr.a.ptr; result->base_addr.charptr = _cptofcd (cr, bucketsize); } result->orig_base = (void *) result->base_addr.a.ptr; result->orig_size = nwords; } else {
_f_real4 _NEAREST_4_8(_f_real4 x, _f_real8 s) { #ifdef KEY /* Bug 10771 */ if (s == (_f_real8) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } _f_int4 infinity = signbit(s) ? (0x80000000 | IEEE_32_INFINITY) : IEEE_32_INFINITY; _f_real4 result = nextafterf(x, * (_f_real4 *) &infinity); return result; #elif 0 /* KEY Bug 3399 */ /* See comment in _NEAREST_4 */ REGISTER_4 x_reg; int positive_s = (s > (_f_real8) 0.0); if (s == (_f_real8) 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } x_reg.f = x; if (IEEE_32_EXPO_ALL_ONES(x_reg.ui)) { return x; } if (x == (_f_real4) 0.0) { /* either +0.0 or -0.0 */ x_reg.ui = positive_s ? 1 : (IEEE_32_SIGN_BIT | 1); } else { int increment = (positive_s == (x > (_f_real4) 0.0)) ? 1 : -1; x_reg.ui += increment; } return x_reg.f; #else REGISTER_4 s1, s2, s3; s1.f = x; if (s == 0.0) { _lerror (_LELVL_ABORT, FENEARZS); } #if defined (_CRAY1) && defined(_CRAYIEEE) s3.ui = s1.ui & ~(IEEE_64_SIGN_BIT); s2.ui = (s1.f > 0) ? LL_CONST(0x20000000) : -(LL_CONST(0x20000000)); if ((_f_real4) TINY_REAL4_F90 > s3.f) s1.f = 0.0; #else s2.ui = (s1.f > 0) ? 0x1 : -(0x1); #endif if (s1.f == 0.0) { s1.f = (s > 0.0) ? (_f_real4) TINY_REAL4_F90 : (_f_real4) -TINY_REAL4_F90; } else if (s > 0.0) { s1.ui += s2.ui; } else { s1.ui -= s2.ui; } #if defined (_CRAY1) && defined(_CRAYIEEE) if (isnormal64(s1.ui)) #else if (isnormal32(s1.ui)) #endif return s1.f; if (x > 1.0 || x < -1.0) return s1.f; return (0.0); #endif /* KEY */ }
void AQRECALL( _f_int *aqp, _f_int *status, _f_int *reqid, _f_int *reply) { AQFIL *f; struct fflistreq *base, *limit, *nxtq, *busy, *eptr; int i; int max; int inside; if (_numargs() < 4) _lerror(_LELVL_ABORT, FEARGCNT, "AQRECALL", _numargs(), "4"); f = (AQFIL *) *aqp; if (f == NULL || f->aq_chk != AQ_CHK) { *status = -FEAQBADH; /* file handle is not valid */ return; } base = f->aq_base; limit = f->aq_limit; max = limit - base; /* * Lock the tail of the queue so no one can stomp on the entry for * which we are searching. */ AQ_LOCK(aq_lkbusy); busy = f->aq_busy; /* * Examine the entire queue, regardless of pointers */ for (i = 0 ; i < max ; i++) { if ( f->aq_reqid[i] == *reqid ) { eptr = &base[i]; /* * Determine if the entry in question is * between busy and nxtq. */ nxtq = f->aq_nxtq; /* Grab stable copy */ inside = NO; if (busy < nxtq) { if (busy <= eptr && eptr < nxtq) inside = YES; } else { if (eptr < nxtq || busy <= eptr) inside = YES; } /* * Now pointing to entry in question. * Wait for its completion, _aqwait will set status * appropriately. If not between nxtq and busy, * assume that if we found it, it must be done. */ *status = IOCOMPLETE; if (inside) { INC_QP(eptr, limit, max); _aqwait(f, status, reply, eptr); } AQ_UNLOCK(aq_lkbusy); return; } } AQ_UNLOCK(aq_lkbusy); *status = NOTFOUND; return; }
void _UBOUND (DopeVectorType * result, DopeVectorType * source, _f_int * dimptr) { int rank; int numbytes; int *destarry; _f_int4 *resptr4; _f_int8 *resptr8; int loopj; /* If source is a pointer/allocatable array, it must be * associated/allocated. */ if (source->p_or_a && !source->assoc) _lerror (_LELVL_ABORT, FENMPTAR, "UBOUND"); /* target is rank-one array with extent source.n_dim */ rank = source->n_dim; /* If result array is not allocated */ if (!result->assoc) { result->base_addr.a.ptr = (void *) NULL; result->dimension[0].extent = rank; result->dimension[0].low_bound = 1; result->dimension[0].stride_mult = result->type_lens.int_len / (sizeof(_f_int) * BITS_PER_BYTE); numbytes = rank * BYTES_PER_WORD; /* allocate rank in bytes for temporary array */ destarry = (void *) malloc (numbytes); if (destarry == NULL) _lerror(_LELVL_ABORT, FENOMEMY); result->base_addr.a.ptr = (void *) destarry; result->assoc = 1; } if (result->type_lens.kind_or_star == 0) { if (result->type_lens.int_len == 64) { resptr8 = (_f_int8 *) result->base_addr.a.ptr; for (loopj = 0; loopj < rank; loopj++) if(source->dimension[loopj].extent != 0) resptr8[loopj] = (_f_int8) (source->dimension[loopj].low_bound + source->dimension[loopj].extent) - 1; else resptr8[loopj] = (_f_int8) 0; } else { resptr4 = (_f_int4 *) result->base_addr.a.ptr; for (loopj = 0; loopj < rank; loopj++) if(source->dimension[loopj].extent != 0) resptr4[loopj] = (_f_int4) (source->dimension[loopj].low_bound + source->dimension[loopj].extent) - 1; else resptr4[loopj] = (_f_int4) 0; } } else { if (result->type_lens.dec_len == 8) { resptr8 = (_f_int8 *) result->base_addr.a.ptr; for (loopj = 0; loopj < rank; loopj++) if(source->dimension[loopj].extent != 0) resptr8[loopj] = (_f_int8) (source->dimension[loopj].low_bound + source->dimension[loopj].extent) - 1; else resptr8[loopj] = (_f_int8) 0; } else if (result->type_lens.dec_len == 4) { resptr4 = (_f_int4 *) result->base_addr.a.ptr; for (loopj = 0; loopj < rank; loopj++) if(source->dimension[loopj].extent != 0) resptr4[loopj] = (_f_int4) (source->dimension[loopj].low_bound + source->dimension[loopj].extent) - 1; else resptr4[loopj] = (_f_int4) 0; } } }
void _PACK ( DopeVectorType * result, DopeVectorType * source, DopeVectorType * mask, DopeVectorType * vector) { char *cs; /* char ptr to source array */ char *cr; /* char ptr to result array */ char *cv; /* char ptr to vector array */ char * restrict cptr1; /* char */ char * restrict cptr2; /* char */ char * restrict cptr3; /* char */ _f_int8 * restrict uptr1; /* 64-bit */ _f_int8 * restrict uptr2; /* 64-bit */ _f_int8 * restrict uptr3; /* 64-bit */ _f_int * restrict fptr1; /* default-size */ _f_int * restrict fptr2; /* default-size */ _f_int * restrict fptr3; /* default-size */ _f_real16 * restrict dptr1; /* 128-bit */ _f_real16 * restrict dptr2; /* 128-bit */ _f_real16 * restrict dptr3; /* 128-bit */ #ifdef _F_COMP16 dblcmplx * restrict xptr1; /* 256-bit */ dblcmplx * restrict xptr2; /* 256-bit */ dblcmplx * restrict xptr3; /* 256-bit */ #endif _f_int4 * restrict hptr1; /* 32-bit */ _f_int4 * restrict hptr2; /* 32-bit */ _f_int4 * restrict hptr3; /* 32-bit */ _f_mask * restrict iptr4; /* def kind mask */ void * restrict sptr; /* ptr to source */ void * restrict rptr; /* ptr to result */ void * restrict mptr; /* ptr to mask */ void * restrict vptr; /* ptr to vector */ _f_int bucketsize; /* size of each data element */ long nbytes; /* # of bytes in data array */ long nwords; /* # of words in data array */ long curdim[MAXDIM]; /* current indices */ _f_int bytealligned; /* byte alligned flag */ long sindx; /* source index */ long rindx; /* result index */ long mindx; /* mask index */ long vindx; /* vector index */ _f_int type; /* type scalar */ _f_int subtype; /* sub-type */ _f_int arithmetic; /* arithmetic */ _f_int rank; /* dimension of source scalar */ long i, j, k; /* index variables */ long res_strd; /* element stride for result */ long vec_strd; /* element stride for result */ long src_ext[MAXDIM]; /* extents for source */ long src_strd[MAXDIM]; /* element stride for source */ long src_off[MAXDIM]; /* offset values for source */ long msk_strd[MAXDIM]; /* element stride for mask */ long msk_off[MAXDIM]; /* offset values for mask */ long indx1_src; /* index for dim 1 of source */ long indx2_src; /* index for dim 2 of source */ long indx1_vec; /* index for dim 1 of vector */ long indx2_vec; /* index for dim 2 of vector */ long indx1_res; /* index for dim 1 of result */ long indx2_res; /* index for dim 2 of result */ long indx1_msk; /* index for dim 1 of msk */ long indx2_msk; /* index for dim 2 of msk */ long total_ext; /* total extent counter */ long src_ext1; /* extent for dim 1 of source */ long src_ext2; /* extent for dim 1 of source */ long found; /* count of # entries in result */ long mask_el_len; _f_int early_exit; /* early exit flag */ /* Set type and dimension global variables */ type = source->type_lens.type; rank = source->n_dim; mask_el_len = mask->base_addr.a.el_len; /* * Check to see if any of the matrices have size 0. If any do, * return without doing anything. */ early_exit = 0; #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < rank; i++) { if (!source->dimension[i].extent) early_exit = 1; } if (result->assoc) { if (!result->dimension[0].extent) early_exit = 1; } if (vector) { if (!vector->dimension[0].extent) early_exit = 1; } if (mask) { if (mask->n_dim > 1) { #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < rank; i++) if (!mask->dimension[i].extent) early_exit = 1; } } /* * Initialize every array element to 0. */ #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < MAXDIM; i++) { curdim[i] = 0; src_ext[i] = 0; src_strd[i] = 0; src_off[i] = 0; msk_strd[i] = 0; msk_off[i] = 0; } /* Size calculation is based on variable type */ switch (type) { case DVTYPE_ASCII : bytealligned = 1; bucketsize = _fcdlen (source->base_addr.charptr); /* bytes */ subtype = DVSUBTYPE_CHAR; arithmetic = 0; break; case DVTYPE_DERIVEDBYTE : bytealligned = 1; bucketsize = source->base_addr.a.el_len / BITS_PER_BYTE; subtype = DVSUBTYPE_CHAR; arithmetic = 0; break; case DVTYPE_DERIVEDWORD : bytealligned = 0; bucketsize = source->base_addr.a.el_len / BITS_PER_WORD; subtype = DVSUBTYPE_DERIVED; arithmetic = 0; break; default : bytealligned = 0; bucketsize = source->type_lens.int_len / BITS_PER_WORD; if (source->type_lens.int_len == 64) { subtype = DVSUBTYPE_BIT64; } else if (source->type_lens.int_len == 32) { subtype = DVSUBTYPE_BIT32; bucketsize = 1; } else if (source->type_lens.int_len == 256) { subtype = DVSUBTYPE_BIT256; } else { subtype = DVSUBTYPE_BIT128; } arithmetic = 1; } /* If necessary, fill result dope vector */ if (!result->assoc) { result->base_addr.a.ptr = (void *) NULL; result->orig_base = 0; result->orig_size = 0; /* Determine size of space to allocate */ if (!bytealligned) { nbytes = bucketsize * BYTES_PER_WORD; #ifdef _CRAYMPP if (subtype == DVSUBTYPE_BIT32) nbytes /= 2; #endif } else { nbytes = bucketsize; } if (vector) { nbytes *= vector->dimension[0].extent; nwords = vector->dimension[0].extent; } else { #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < rank; i++) nbytes *= source->dimension[i].extent; nwords = nbytes / BYTES_PER_WORD; } if (nbytes > 0) { result->base_addr.a.ptr = (void *) malloc (nbytes); if (result->base_addr.a.ptr == NULL) _lerror (_LELVL_ABORT, FENOMEMY); } result->assoc = 1; result->base_addr.a.el_len = source->base_addr.a.el_len; if (type == DVTYPE_ASCII) { cr = (char *) result->base_addr.a.ptr; result->base_addr.charptr = _cptofcd (cr, bucketsize); } result->orig_size = nbytes * BITS_PER_BYTE; /* * These are initial values which may be changed when it is * determined how big the result array actually is. */ result->dimension[0].low_bound = 1; result->dimension[0].extent = nwords; result->dimension[0].stride_mult = bucketsize; /* if result array is already allocated */ } else { if (!bytealligned) nbytes = bucketsize * BYTES_PER_WORD; else nbytes = bucketsize; if (vector) { nbytes *= vector->dimension[0].extent; nwords = vector->dimension[0].extent; } else { nwords = 1; for (i = 0; i < rank; i++) { nbytes *= source->dimension[i].extent; nwords *= source->dimension[i].extent; } } } /* If early exit is required, exit now */ if (early_exit) return; if (mask) { iptr4 = (_f_mask *) mask->base_addr.a.ptr; if (mask->n_dim == 0 && !(vector) && !LTOB(mask_el_len, &iptr4[0])) { result->dimension[0].extent = 0; return; } } /* Set up scalar pointers to all of the argument data areas */ if (mask) mptr = (void *) mask->base_addr.a.ptr; if (!bytealligned) { sptr = (void *) source->base_addr.a.ptr; rptr = (void *) result->base_addr.a.ptr; if (vector) vptr = (void *) vector->base_addr.a.ptr; } else { if (type == DVTYPE_ASCII) { cs = _fcdtocp (source->base_addr.charptr); cr = _fcdtocp (result->base_addr.charptr); if (vector) cv = _fcdtocp (vector->base_addr.charptr); } else { cs = (char *) source->base_addr.a.ptr; cr = (char *) result->base_addr.a.ptr; if (vector) cv = (char *) vector->base_addr.a.ptr; } } /* Set up some 'shortcut' variables used for index calculation */ if (bucketsize > 1 && arithmetic) { res_strd = result->dimension[0].stride_mult / bucketsize; if (vector) vec_strd = vector->dimension[0].stride_mult / bucketsize; } else { res_strd = result->dimension[0].stride_mult; if (vector) vec_strd = vector->dimension[0].stride_mult; } #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < rank; i++) { src_ext[i] = source->dimension[i].extent; if (bucketsize > 1 && arithmetic) { src_strd[i] = source->dimension[i].stride_mult / bucketsize; } else { src_strd[i] = source->dimension[i].stride_mult; } } if (mask->n_dim > 0) { #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < rank; i++) { msk_strd[i] = mask->dimension[i].stride_mult; iptr4 = (_f_mask *) mptr; #ifdef _CRAYMPP if (mask_el_len == 64 && sizeof (iptr4[0]) == 4) msk_strd[i] <<= 1; #endif } } /* * The program is divided up into three blocks. The first block deals * with arrays of rank 1. Inside each block, the data types are broken * up into groups based on container size. Integer, real, and logical * types are all one word, and the actual value is not used, so they * are all grouped together and treated as long. The same is * true for double and complex, as well as ascii and derivedbyte. * * For each group, the mask array is checked for true values. When one * is encountered, the corresponding value from the source array is put * into the next available position in the result array. If no vector * is passed, the routine is finished at this point with the result * array length set to the number of true elements in the mask. If a * vector is furnished, the size of the vector determines the size of * the result array. If this size has been reached, the routine is done. * If not, elements from the vector array are put into the result array * until it is full. */ if (rank == 1) { found = 0; iptr4 = (_f_mask *) mptr; switch (subtype) { case DVSUBTYPE_BIT64 : uptr1 = (_f_int8 *) sptr; uptr2 = (_f_int8 *) vptr; uptr3 = (_f_int8 *) rptr; rindx = 0; mindx = 0; vindx = 0; sindx = 0; src_ext1 = source->dimension[0].extent; for (i = 0; i < src_ext1; i++) { if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = i * src_strd[0]; uptr3[rindx] = uptr1[sindx]; rindx += res_strd; found++; } mindx += msk_strd[0]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { uptr3[rindx] = uptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT32 : hptr1 = (_f_int4 *) sptr; hptr2 = (_f_int4 *) vptr; hptr3 = (_f_int4 *) rptr; rindx = 0; mindx = 0; vindx = 0; sindx = 0; src_ext1 = source->dimension[0].extent; for (i = 0; i < src_ext1; i++) { if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = i * src_strd[0]; hptr3[rindx] = hptr1[sindx]; rindx += res_strd; found++; } mindx += msk_strd[0]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { hptr3[rindx] = hptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT128 : dptr1 = (_f_real16 *) sptr; dptr2 = (_f_real16 *) vptr; dptr3 = (_f_real16 *) rptr; rindx = 0; mindx = 0; vindx = 0; sindx = 0; src_ext1 = source->dimension[0].extent; for (i = 0; i < src_ext1; i++) { if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = i * src_strd[0]; dptr3[rindx] = dptr1[sindx]; rindx += res_strd; found++; } mindx += msk_strd[0]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { dptr3[rindx] = dptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_CHAR : rindx = 0; mindx = 0; vindx = 0; sindx = 0; src_ext1 = source->dimension[0].extent; for (i = 0; i < src_ext1; i++) { if (LTOB(mask_el_len, &iptr4[mindx])) { cptr3 = (char *) cr + rindx; cptr1 = (char *) cs + (i * src_strd[0]); (void) memcpy (cptr3, cptr1, bucketsize); rindx += res_strd; found++; } mindx += msk_strd[0]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { cptr3 = (char *) cr + rindx; cptr2 = (char *) cv + vindx; (void) memcpy (cptr3, cptr2, bucketsize); rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_DERIVED : fptr1 = (_f_int *) sptr; fptr2 = (_f_int *) vptr; fptr3 = (_f_int *) rptr; src_ext1 = source->dimension[0].extent; indx1_res = 0; /* * The derived word type is handled the same as the other types except * that another loop is added. The assumption was made that extent of * the array would be larger than the number of words in the derived * type. Therefore, to try and make this routine optimal, the first * loop uses the extent as its inner loop, which should provide better * optimization. The second loop is also done this way. */ for (i = 0; i < bucketsize; i++) { rindx = i; mindx = 0; for (j = 0; j < src_ext1; j++) { if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = i + (j * src_strd[0]); fptr3[rindx] = fptr1[sindx]; rindx += res_strd; if (i == 0) { indx1_res = rindx; found++; } } mindx += msk_strd[0]; } } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { indx1_vec = found * vec_strd; found = nwords - found; for (i = 0; i < bucketsize; i++) { rindx = indx1_res + i; vindx = indx1_vec + i; for (j = 0; j < found; j++) { fptr3[rindx] = fptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } } break; #ifdef _F_COMP16 case DVSUBTYPE_BIT256 : xptr1 = (dblcmplx *) sptr; xptr2 = (dblcmplx *) vptr; xptr3 = (dblcmplx *) rptr; rindx = 0; mindx = 0; vindx = 0; sindx = 0; src_ext1 = source->dimension[0].extent; for (i = 0; i < src_ext1; i++) { if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = i * src_strd[0]; xptr3[rindx].re = xptr1[sindx].re; xptr3[rindx].im = xptr1[sindx].im; rindx += res_strd; found++; } mindx += msk_strd[0]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { xptr3[rindx].re = xptr2[vindx].re; xptr3[rindx].im = xptr2[vindx].im; rindx += res_strd; vindx += vec_strd; } } break; #endif default : _lerror (_LELVL_ABORT, FEINTDTY); } } else if (rank == 2) { /* * Rank 2 matrices are handled in a manner similar to rank 1 arrays, * except that the first loop in each data type is a nested loop, with * the outer loop being the second dimension, and the inner loop being * the first. This preserves the storage order which is necessary for * pack to work. The second part of each block is not affected by the * number of dimensions in the source matrix. */ found = 0; iptr4 = (_f_mask *) mptr; switch (subtype) { case DVSUBTYPE_BIT64 : uptr1 = (_f_int8 *) sptr; uptr2 = (_f_int8 *) vptr; uptr3 = (_f_int8 *) rptr; indx2_msk = 0; indx2_src = 0; rindx = 0; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < src_ext2; i++) { indx1_msk = 0; for (j = 0; j < src_ext1; j++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + (j * src_strd[0]); uptr3[rindx] = uptr1[sindx]; rindx += res_strd; found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { uptr3[rindx] = uptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT32 : hptr1 = (_f_int4 *) sptr; hptr2 = (_f_int4 *) vptr; hptr3 = (_f_int4 *) rptr; indx2_msk = 0; indx2_src = 0; rindx = 0; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < src_ext2; i++) { indx1_msk = 0; for (j = 0; j < src_ext1; j++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + (j * src_strd[0]); hptr3[rindx] = hptr1[sindx]; rindx += res_strd; found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { hptr3[rindx] = hptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT128 : dptr1 = (_f_real16 *) sptr; dptr2 = (_f_real16 *) vptr; dptr3 = (_f_real16 *) rptr; indx2_msk = 0; indx2_src = 0; rindx = 0; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < src_ext2; i++) { indx1_msk = 0; for (j = 0; j < src_ext1; j++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + (j * src_strd[0]); dptr3[rindx] = dptr1[sindx]; rindx += res_strd; found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { dptr3[rindx] = dptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_CHAR : indx2_msk = 0; indx2_src = 0; rindx = 0; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < src_ext2; i++) { indx1_msk = 0; for (j = 0; j < src_ext1; j++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + (j * src_strd[0]); cptr1 = (char *) cs + sindx; cptr3 = (char *) cr + rindx; (void) memcpy (cptr3, cptr1, bucketsize); rindx += res_strd; found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { cptr2 = (char *) cv + vindx; cptr3 = (char *) cr + rindx; (void) memcpy (cptr3, cptr2, bucketsize); rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_DERIVED : fptr1 = (_f_int *) sptr; fptr2 = (_f_int *) vptr; fptr3 = (_f_int *) rptr; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < bucketsize; i++) { indx2_msk = 0; indx2_src = 0; rindx = i; for (j = 0; j < src_ext2; j++) { indx1_msk = 0; for (k = 0; k < src_ext1; k++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + i + (k * src_strd[0]); fptr3[rindx] = fptr1[sindx]; rindx += res_strd; if (i == 0) found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { indx1_res = found * res_strd; indx1_vec = found * vec_strd; found = nwords - found; for (i = 0; i < bucketsize; i++) { rindx = indx1_res + i; vindx = indx1_vec + i; for (j = 0; j < found; j++) { fptr3[rindx] = fptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } } break; #ifdef _F_COMP16 case DVSUBTYPE_BIT256 : xptr1 = (dblcmplx *) sptr; xptr2 = (dblcmplx *) vptr; xptr3 = (dblcmplx *) rptr; indx2_msk = 0; indx2_src = 0; rindx = 0; src_ext1 = src_ext[0]; src_ext2 = src_ext[1]; for (i = 0; i < src_ext2; i++) { indx1_msk = 0; for (j = 0; j < src_ext1; j++) { mindx = indx1_msk + indx2_msk; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx2_src + (j * src_strd[0]); xptr3[rindx].re = xptr1[sindx].re; xptr3[rindx].im = xptr1[sindx].im; rindx += res_strd; found++; } indx1_msk += msk_strd[0]; } indx2_msk += msk_strd[1]; indx2_src += src_strd[1]; } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { xptr3[rindx].re = xptr2[vindx].re; xptr3[rindx].im = xptr2[vindx].im; rindx += res_strd; vindx += vec_strd; } } break; #endif default : _lerror (_LELVL_ABORT, FEINTDTY); } } else { /* rank 3-7 */ /* * Ranks 3 through 7 are all handled in this last block. It was assumed * that ranks 1 and 2 would account for the majority of calls to pack, * and that the remaining ranks could be done in one block. * * The logic behind these blocks is the same as for the other ranks. * The first part of the routine uses two nested loops, with the inner * loop being the first dimension, and the outer loop being the product * of all of the remaining dimensions. A array of counters keeps track * of the values for each of the dimensions. Two macros are used in * this block. INCREMENT are used to calculate the values of each of * the dimension counters, and to calculate the offsets into the array * for each index. FIND_INDX sums these offsets into one offset, which * is used for each iteration of the inner loop. As with the other two * blocks, the second part of each section is not affected by the number * of dimensions in the source matrix. * * Calculate the product of each of the dimensions 2-n. This is the * number of times the outer loop will be executed. Also, initialize * the offset and dimension counter arrays. */ total_ext = 1; #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 0; i < MAXDIM; i++) { curdim[i] = 0; msk_off[i] = 0; src_off[i] = 0; } #ifdef _UNICOS #pragma _CRI shortloop #endif for (i = 1; i < rank; i++) total_ext *= source->dimension[i].extent; iptr4 = (_f_mask *) mptr; found = 0; switch (subtype) { case DVSUBTYPE_BIT64 : uptr2 = (_f_int8 *) vptr; uptr3 = (_f_int8 *) rptr; rindx = 0; for (i = 0; i < total_ext; i++) { FIND_INDX(); uptr1 = (_f_int8 *) sptr + indx1_src; iptr4 = (_f_mask *) mptr + indx1_msk; for (j = 0; j < src_ext[0]; j++) { mindx = j * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = j * src_strd[0]; uptr3[rindx] = uptr1[sindx]; rindx += res_strd; found++; } } INCREMENT(); } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { uptr3[rindx] = uptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT32 : hptr2 = (_f_int4 *) vptr; hptr3 = (_f_int4 *) rptr; rindx = 0; for (i = 0; i < total_ext; i++) { FIND_INDX(); hptr1 = (_f_int4 *) sptr + indx1_src; iptr4 = (_f_mask *) mptr + indx1_msk; for (j = 0; j < src_ext[0]; j++) { mindx = j * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = j * src_strd[0]; hptr3[rindx] = hptr1[sindx]; rindx += res_strd; found++; } } INCREMENT(); } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { hptr3[rindx] = hptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_BIT128 : dptr2 = (_f_real16 *) vptr; dptr3 = (_f_real16 *) rptr; rindx = 0; for (i = 0; i < total_ext; i++) { FIND_INDX(); dptr1 = (_f_real16 *) sptr + indx1_src; iptr4 = (_f_mask *) mptr + indx1_msk; for (j = 0; j < src_ext[0]; j++) { mindx = j * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = j * src_strd[0]; dptr3[rindx] = dptr1[sindx]; rindx += res_strd; found++; } } INCREMENT(); } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { dptr3[rindx] = dptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_CHAR : cptr2 = (char *) vptr; cptr3 = (char *) rptr; rindx = 0; for (i = 0; i < total_ext; i++) { FIND_INDX(); iptr4 = (_f_mask *) mptr + indx1_msk; for (j = 0; j < src_ext[0]; j++) { mindx = j * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = indx1_src + (j * src_strd[0]); cptr1 = (char *) cs + sindx; cptr3 = (char *) cr + rindx; (void) memcpy (cptr3, cptr1, bucketsize); rindx += res_strd; found++; } } INCREMENT(); } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { cptr3 = (char *) cr + rindx; cptr2 = (char *) cv + vindx; (void) memcpy (cptr3, cptr2, bucketsize); rindx += res_strd; vindx += vec_strd; } } break; case DVSUBTYPE_DERIVED : fptr2 = (_f_int *) vptr; fptr3 = (_f_int *) rptr; for (i = 0; i < bucketsize; i++) { rindx = i; for (j = 0; j < rank; j++) { msk_off[j] = 0; src_off[j] = 0; curdim[j] = 0; } for (j = 0; j < total_ext; j++) { FIND_INDX(); fptr1 = (_f_int *) sptr + i + indx1_src; iptr4 = (_f_mask *) mptr + indx1_msk; for (k = 0; k < src_ext[0]; k++) { mindx = k * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = k * src_strd[0]; fptr3[rindx] = fptr1[sindx]; rindx += res_strd; if (i == 0) found++; } } INCREMENT(); } } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { indx1_res = found * res_strd; indx1_vec = found * vec_strd; found = nwords - found; for (i = 0; i < bucketsize; i++) { rindx = indx1_res + i; vindx = indx1_vec + i; for (j = 0; j < found; j++) { fptr3[rindx] = fptr2[vindx]; rindx += res_strd; vindx += vec_strd; } } } break; #ifdef _F_COMP16 case DVSUBTYPE_BIT256 : xptr2 = (dblcmplx *) vptr; xptr3 = (dblcmplx *) rptr; rindx = 0; for (i = 0; i < total_ext; i++) { FIND_INDX(); xptr1 = (dblcmplx *) sptr + indx1_src; iptr4 = (_f_mask *) mptr + indx1_msk; for (j = 0; j < src_ext[0]; j++) { mindx = j * msk_strd[0]; if (LTOB(mask_el_len, &iptr4[mindx])) { sindx = j * src_strd[0]; xptr3[rindx].re = xptr1[sindx].re; xptr3[rindx].im = xptr1[sindx].im; rindx += res_strd; found++; } } INCREMENT(); } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { xptr3[rindx].re = xptr2[vindx].re; xptr3[rindx].im = xptr2[vindx].im; rindx += res_strd; vindx += vec_strd; } } if (!vector || found == nwords) { result->dimension[0].extent = found; } else { vindx = found * vec_strd; for ( ; found < nwords; found++) { xptr3[rindx].re = xptr2[vindx].re; xptr3[rindx].im = xptr2[vindx].im; rindx += res_strd; vindx += vec_strd; } } break; #endif default : _lerror (_LELVL_ABORT, FEINTDTY); } } }
void PACK( _f_int *p, _f_int *nbits, _f_int *u, _f_int *count ) { register int nb; register int ni; #ifdef _UNICOS if (_numargs() < 4) _lerror(_LELVL_ABORT, FEPCKARG); #endif nb = *nbits; ni = *count; if (nb < 0) _lerror(_LELVL_ABORT, FEPCKNEG); if (nb == 0) _lerror(_LELVL_ABORT, FEPCKPW2); if (ni > 0) { register short cpw; /* Chunks per word */ register short remr; /* Remainder */ register int i; register int items; /* Number of full-word items */ register long mask; /* Mask for each item */ register long word; /* Scratch word */ cpw = 64 / nb; /* Chunks per word */ remr = (ni * nb) & 077; items = ((ni * nb) + 63) / 64;/* Round up */ mask = (1 << nb) - 1; switch (nb) { case 32: #pragma _CRI ivdep for (i = 0; i < items; i++) { word = (*u++ & mask) << nb; word = word | (*u++ & mask); *p++ = word; } break; case 16: #pragma _CRI ivdep for (i = 0; i < items; i++) { word = (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); *p++ = word; } break; case 8: #pragma _CRI ivdep for (i = 0; i < items; i++) { word = (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); word = (word << nb) | (*u++ & mask); *p++ = word; } break; case 4: case 2: case 1: for (i = 0; i < items; i++) { register short j; word = 0; #pragma _CRI shortloop for (j = 0; j < cpw; j++) word = (word << nb) | (*u++ & mask); *p++ = word; } break; default: _lerror(_LELVL_ABORT, FEPCKPW2); break; } /* switch */ if (remr != 0) { p = p - 1; *p = *p & (-1 << (64 - remr)); } } return; }