void _CSHIFT( DopeVectorType *result, DopeVectorType *array, DopeVectorType *shift, i4 *dim) { char * result_p, * result_b ; char * array_p, * array_b ; char * shift_p, * shift_b ; size_t src_extent [MAX_NARY_DIMS] ; size_t src_stride [MAX_NARY_DIMS] ; size_t src_offset [MAX_NARY_DIMS] ; size_t counter[MAX_NARY_DIMS] ; size_t res_stride [MAX_NARY_DIMS] ; size_t res_extent [MAX_NARY_DIMS] ; size_t res_offset [MAX_NARY_DIMS] ; int32_t j,ii; char *rp, *ap ; int32_t res_rank ; int32_t shf_rank ; int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); size_t a_size,a_stride,r_stride, i,k ; int8_t zero_szd_source = FALSE; int8_t byte_aligned = FALSE; size_t a_offs,a_bump,r_bump ; size_t ll1,ll2; int64_t shft,shf_typ_sz ; char *rp1, *ap1, *ap2 ; int32_t ddim ; size_t shf_stride [MAX_NARY_DIMS] ; size_t shf_offset [MAX_NARY_DIMS] ; size_t num_trues ; int32_t local_alloc ; size_t tot_ext ; size_t str_sz ; size_t src_size ; size_t res_sz; size_t xfer_sz; size_t tot_sz; int8_t computed_shift = FALSE ; ddim = (*dim) - 1 ; if ((ddim > src_rank) || (ddim < 0)) ERROR(_LELVL_ABORT,FESCIDIM); src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ; src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ; byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ; for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { if (j != ddim ) { src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ; src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ; src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1])) ; k++ ; } counter[j] = 0 ; shf_offset[j] = 0 ; zero_szd_source = zero_szd_source || (src_extent[j] == 0) ; } if (!GET_ASSOCIATED_FROM_DESC(result)) { size_t nbytes ; size_t ext ; char *p ; SET_ADDRESS_IN_DESC(result,NULL); SET_ORIG_BS_IN_DESC(result,NULL) ; SET_ORIG_SZ_IN_DESC(result,0) ; p = NULL ; tot_ext = 1 ; nbytes = typ_sz ; str_sz = MK_STRIDE(byte_aligned,typ_sz); for ( i = 0 ; i <= src_rank ; i ++) { ext = GET_EXTENT_FROM_DESC(array,i) ; SET_LBOUND_IN_DESC(result,i,1); SET_EXTENT_IN_DESC(result,i,ext); SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz ); tot_ext *= ext; nbytes *= ext; } if (nbytes > 0) { p = (void *) malloc (nbytes); if (p == NULL) ERROR(_LELVL_ABORT, FENOMEMY); SET_ADDRESS_IN_DESC(result,p); } SET_ASSOCIATED_IN_DESC(result); SET_CONTIG_IN_DESC(result); if (GET_DV_ASCII_FROM_DESC(array)) { SET_CHARPTR_IN_DESC(result,p,typ_sz); } SET_ORIG_BS_IN_DESC(result,p) ; SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ; } res_stride[0] = GET_STRIDE_FROM_DESC(result,ddim) ; for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { if (j != ddim ) { res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ; res_offset[k-1] = res_stride[k] - (res_stride [k-1] * (src_extent[k-1])) ; k++ ; } } shf_typ_sz = GET_ELEMENT_SZ_FROM_DESC(shift); shf_rank = GET_RANK_FROM_DESC(shift); shift_p = GET_ADDRESS_FROM_DESC(shift); shf_stride[0] = 0 ; for ( j = 0 ; j < shf_rank ; j ++ ) { shf_stride[j] = GET_STRIDE_FROM_DESC(shift,j) ; } for ( j = 1 ; j < shf_rank ; j ++ ) { shf_offset[j] = shf_stride[j] - (shf_stride [j-1] * (src_extent[j])) ; } a_bump = src_extent[0] * src_stride[0] ; r_bump = src_extent[0] * res_stride[0] ; if (zero_szd_source) return ; a_size = src_extent[0] ; a_stride = src_stride[0] ; r_stride = res_stride[0] ; array_p = GET_ADDRESS_FROM_DESC(array); result_p = GET_ADDRESS_FROM_DESC(result); if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p)) { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { *(i1 *)result_p = *(i1 *)ap1 ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(i1 *)result_p = *(i1 *)ap2 ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { *(i2 *)result_p = *(i2 *)ap1 ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(i2 *)result_p = *(i2 *)ap2 ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { *(r4 *)result_p = *(r4 *)ap1 ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(r4 *)result_p = *(r4 *)ap2 ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { *(r8 *)result_p = *(r8 *)ap1 ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(r8 *)result_p = *(r8 *)ap2 ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { *(r16 *)result_p = *(r16 *)ap1 ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(r16 *)result_p = *(r16 *)ap2 ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else { while (counter[src_rank] < src_extent[src_rank] ) { if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1) : shft = * (i1 *)shift_p ; break ; case sizeof(i2) : shft = * (i2 *)shift_p ; break ; case sizeof(i4) : shft = * (i4 *)shift_p ; break ; case sizeof(i8) : shft = * (i8 *)shift_p ; break ; } shft = shft % (int64_t)src_extent[0]; if (shft < 0 ) { ll1 = abs(shft) ; ll2 = (int64_t)src_extent[0] - abs(shft) ; } else { ll1 = (int64_t)src_extent[0] - shft ; ll2 = shft ; } a_offs = a_stride * ll2 ; shift_p += shf_stride[0] ; if (shf_rank == 0) computed_shift = TRUE; } ap1 = array_p + a_offs ; for ( k = 0 ; k < ll1 ; k ++ ) { rp = result_p ; ap = ap1 ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; result_p += r_stride ; ap1 += a_stride ; } ap2 = array_p ; for ( k = 0 ; k < ll2 ; k ++ ) { rp = result_p ; ap = ap2 ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; result_p += r_stride ; ap2 += a_stride ; } array_p += a_bump ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } }
void _RESHAPE( DopeVectorType *result, DopeVectorType *array, DopeVectorType *shape, DopeVectorType *pad, DopeVectorType *order) { char * result_p, * result_b ; char * array_p, * array_b ; i4 * shape_p, * shape_b ; char * pad_p, * pad_b ; i4 * order_p, * order_b ; size_t src_extent [MAX_NARY_DIMS] ; size_t src_stride [MAX_NARY_DIMS] ; size_t src_offset [MAX_NARY_DIMS] ; size_t counter[MAX_NARY_DIMS] ; size_t res_stride [MAX_NARY_DIMS] ; size_t res_extent [MAX_NARY_DIMS] ; size_t res_offset [MAX_NARY_DIMS] ; size_t res_counter[MAX_NARY_DIMS] ; size_t pad_stride [MAX_NARY_DIMS] ; size_t pad_extent [MAX_NARY_DIMS] ; size_t pad_offset [MAX_NARY_DIMS] ; int32_t l_order[MAX_NARY_DIMS] ; int32_t l_order_chk[MAX_NARY_DIMS] ; int32_t l_shape[MAX_NARY_DIMS] ; int32_t j,ii; char *rp, *ap ; int32_t *gp1 ; int32_t pad_rank ; int32_t shp_rank ; int32_t res_rank ; int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); size_t a_size,a_stride,r_stride, i,k ; size_t l_size,l_stride; size_t tot_shape, tot_source; char * l_p ; int8_t zero_szd_shape = FALSE; int8_t zero_szd_order = FALSE; int8_t zero_szd_source = FALSE; int8_t zero_szd_pad = FALSE; int8_t byte_aligned = FALSE; int32_t ddim ; size_t num_trues ; int32_t local_alloc ; size_t tot_ext ; size_t str_sz ; size_t src_size ; size_t res_sz; size_t xfer_sz; size_t tot_sz; tot_source = 1 ; for( j = 0 ; j <= src_rank ; j ++ ) { src_extent[j] = GET_EXTENT_FROM_DESC(array,j) ; src_stride[j] = GET_STRIDE_FROM_DESC(array,j) ; counter[j] = 0 ; zero_szd_source = zero_szd_source || (src_extent[j] == 0) ; tot_source *= src_extent[j]; } for ( j = 1 ; j <= src_rank ; j ++ ) src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1])) ; byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ; res_rank = GET_EXTENT_FROM_DESC(shape,0) ; if (res_rank == 0) ERROR(_LELVL_ABORT, FESHPSZZ); l_stride = GET_STRIDE_FROM_DESC(shape,0); l_size = GET_ELEMENT_SZ_FROM_DESC(shape); l_p = GET_ADDRESS_FROM_DESC(shape); switch (l_size) { case 1: for (j = 0 ; j < res_rank ; j++ ) { l_shape[j] = * (int8_t *) l_p ; l_p += l_stride; } break; case 2: for (j = 0 ; j < res_rank ; j++ ) { l_shape[j] = * (int16_t *) l_p ; l_p += l_stride; } break; case 4: for (j = 0 ; j < res_rank ; j++ ) { l_shape[j] = * (int32_t *) l_p ; l_p += l_stride; } break; case 8: for (j = 0 ; j < res_rank ; j++ ) { l_shape[j] = * (int64_t *) l_p ; l_p += l_stride; } break; } tot_shape = 1; for (i = 0; i < res_rank; i++) { if (l_shape[i] < 0) ERROR (_LELVL_ABORT, FERSHNEG); zero_szd_shape = zero_szd_shape || (l_shape[i] == 0); tot_shape *= l_shape[i]; } if (order == NULL) { for (j = 0 ; j < res_rank ; j++ ) l_order[j] = j + 1; } else { l_stride = GET_STRIDE_FROM_DESC(order,0); l_size = GET_ELEMENT_SZ_FROM_DESC(order); l_p = GET_ADDRESS_FROM_DESC(order); switch (l_size) { case 1: for (j = 0 ; j < res_rank ; j++ ) { l_order_chk[j] = FALSE; l_order[j] = * (int8_t *) l_p ; l_p += l_stride; } break; case 2: for (j = 0 ; j < res_rank ; j++ ) { l_order_chk[j] = FALSE; l_order[j] = * (int16_t *) l_p ; l_p += l_stride; } break; case 4: for (j = 0 ; j < res_rank ; j++ ) { l_order_chk[j] = FALSE; l_order[j] = * (int32_t *) l_p ; l_p += l_stride; } break; case 8: for (j = 0 ; j < res_rank ; j++ ) { l_order_chk[j] = FALSE; l_order[j] = * (int64_t *) l_p ; l_p += l_stride; } break; } for (i = 0; i < res_rank; i++) { if (l_order[i] <= 0 || l_order[i] > res_rank) ERROR(_LELVL_ABORT, FEBDORDR); l_order_chk[l_order[i]-1] = TRUE; zero_szd_order = zero_szd_order || (l_order[i] == 0) ; } for (i = 0; i < res_rank; i++) { if (!l_order_chk[i]) ERROR(_LELVL_ABORT, FEBDORDR); } } if (pad != NULL ) { pad_p = GET_ADDRESS_FROM_DESC(pad); pad_rank = GET_RANK_FROM_DESC(pad) - 1; for ( j = 0 ; j <= pad_rank ; j ++ ) { pad_extent[j] = GET_EXTENT_FROM_DESC(pad,j) ; pad_stride[j] = GET_STRIDE_FROM_DESC(pad,j) ; zero_szd_pad = zero_szd_pad || (pad_extent[j] == 0) ; } for ( j = 1 ; j <= pad_rank ; j ++ ) pad_offset[j-1] = pad_stride[j] - (pad_stride [j-1] * (pad_extent[j-1])) ; } else if (tot_shape > tot_source) { ERROR(_LELVL_ABORT, FERSHNPD); } if (!GET_ASSOCIATED_FROM_DESC(result)) { size_t nbytes ; char *p ; SET_ADDRESS_IN_DESC(result,NULL); SET_ORIG_BS_IN_DESC(result,NULL) ; SET_ORIG_SZ_IN_DESC(result,0) ; SET_RANK_IN_DESC(result,res_rank) ; p = NULL ; tot_ext = 1 ; nbytes = typ_sz ; str_sz = MK_STRIDE(byte_aligned,typ_sz); for ( i = 0 ; i < res_rank ; i ++) { SET_LBOUND_IN_DESC(result,i,1); SET_EXTENT_IN_DESC(result,i,l_shape[i]); SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz ); tot_ext *= l_shape[i]; nbytes *= l_shape[i]; } if (nbytes > 0 && !zero_szd_order) { p = (void *) malloc (nbytes); if (p == NULL) ERROR(_LELVL_ABORT, FENOMEMY); SET_ADDRESS_IN_DESC(result,p); } SET_ASSOCIATED_IN_DESC(result); SET_CONTIG_IN_DESC(result); SET_ALEN_IN_DESC(result,GET_ALEN_FROM_DESC(array)); if (GET_DV_ASCII_FROM_DESC(array)) { SET_CHARPTR_IN_DESC(result,p,typ_sz); } SET_ORIG_BS_IN_DESC(result,p) ; SET_ORIG_SZ_IN_DESC(result,nbytes*8) ; } if (zero_szd_shape || zero_szd_order) return ; if (zero_szd_source && (pad == NULL || zero_szd_pad)) ERROR(_LELVL_ABORT, FERSHNPD); for ( j = 0 , gp1 = l_order ; j < res_rank ; j ++ ) { if (gp1 == NULL) ii = j ; else ii = (*gp1++)-1 ; res_stride[j] = GET_STRIDE_FROM_DESC(result,ii) ; res_extent[j] = GET_EXTENT_FROM_DESC(result,ii) ; res_counter[j] = 0 ; } for ( j = 1 ; j < res_rank ; j ++ ) res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1])) ; res_rank -- ; if (zero_szd_source) if (pad != NULL) for (i = 0 ; i <= src_rank ; i ++) src_extent[i] = 0; else return ; a_size = src_extent[0] ; a_stride = src_stride[0] ; r_stride = res_stride[0] ; array_p = GET_ADDRESS_FROM_DESC(array); result_p = GET_ADDRESS_FROM_DESC(result); if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p) && ((pad_p == NULL) || ALIGNED_i1(pad_p))) { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { *(i1 *)result_p = *(i1 *)array_p ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) && ((pad_p == NULL) || ALIGNED_i2(pad_p))) { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { *(i2 *)result_p = *(i2 *)array_p ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) && ((pad_p == NULL) || ALIGNED_r4(pad_p))) { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { *(r4 *)result_p = *(r4 *)array_p ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) && ((pad_p == NULL) || ALIGNED_r8(pad_p))) { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { *(r8 *)result_p = *(r8 *)array_p ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) && ((pad_p == NULL) || ALIGNED_r16(pad_p))) { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { *(r16 *)result_p = *(r16 *)array_p ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } else { for (;;) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { ap = array_p ; rp = result_p ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; array_p += a_stride ; result_p += r_stride ; j = 0 ; res_counter[0] ++ ; while (res_counter[j] == res_extent[j]) { if (j == res_rank ) return ; result_p += res_offset[j] ; res_counter[j+1]++ ; res_counter[j] = 0 ; j ++ ; } } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } if (pad != NULL) { src_rank = pad_rank ; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent [j] = pad_extent[j] ; src_stride [j] = pad_stride[j] ; counter[j] = 0 ; src_offset [j] = pad_offset[j] ; } array_p = pad_p ; a_size = src_extent [0] ; a_stride = src_stride [0] ; } } } }
/* * Copy array section "array" to contiguous array "result". Modeled after * "pack". */ void _Copyin( void *result, DopeVectorType *array) { char *result_p = (char *) result; size_t src_extent[MAX_NARY_DIMS]; size_t src_stride[MAX_NARY_DIMS]; size_t src_offset[MAX_NARY_DIMS]; size_t counter[MAX_NARY_DIMS]; size_t i; int32_t j; int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); int8_t zero_szd_source = FALSE; size_t src_size = 1; for ( j = 0; j <= src_rank ; j ++ ) { src_extent[j] = GET_EXTENT_FROM_DESC(array,j); src_stride[j] = GET_STRIDE_FROM_DESC(array,j); src_size *= src_extent[j]; counter[j] = 0; zero_szd_source = zero_szd_source || (src_extent[j] == 0); } for ( j = 1; j <= src_rank ; j ++ ) src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1])); int8_t byte_aligned = GET_BYTEALIGNED_FROM_DESC(array); size_t tot_ext = src_size; if (zero_szd_source) return; size_t a_size = src_extent[0]; size_t a_stride = src_stride[0]; size_t r_stride = typ_sz; char *array_p = GET_ADDRESS_FROM_DESC(array); if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p)) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { *(i1 *)result_p = *(i1 *)array_p; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { *(i2 *)result_p = *(i2 *)array_p; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { *(ui4 *)result_p = *(ui4 *)array_p; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { *(ui8 *)result_p = *(ui8 *)array_p; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { *(ui16 *)result_p = *(ui16 *)array_p; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0; i < a_size ; i ++ ) { char *ap = array_p; char *rp = result_p; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0; j < typ_sz ; j ++) *rp++ = *ap ++ ; result_p += r_stride; array_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } }
/* * Copy contiguous array "source" to array section "dest". Modeled after * "pack". */ void _Copyout( DopeVectorType *dest, void *source) { char *source_p = (char *) source; size_t dest_extent[MAX_NARY_DIMS]; size_t dest_stride[MAX_NARY_DIMS]; size_t dest_offset[MAX_NARY_DIMS]; size_t counter[MAX_NARY_DIMS]; size_t i; int32_t j; int32_t dest_rank = GET_RANK_FROM_DESC(dest) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(dest); int8_t zero_szd_dest = FALSE; size_t dest_size = 1; for ( j = 0; j <= dest_rank; j ++ ) { dest_extent[j] = GET_EXTENT_FROM_DESC(dest,j); dest_stride[j] = GET_STRIDE_FROM_DESC(dest,j); dest_size *= dest_extent[j]; counter[j] = 0; zero_szd_dest = zero_szd_dest || (dest_extent[j] == 0); } for ( j = 1; j <= dest_rank; j ++ ) dest_offset[j-1] = dest_stride[j] - (dest_stride [j-1] * (dest_extent[j-1])); int8_t byte_aligned = GET_BYTEALIGNED_FROM_DESC(dest); size_t tot_ext = dest_size; if (zero_szd_dest) return; size_t a_size = dest_extent[0]; size_t a_stride = dest_stride[0]; size_t s_stride = typ_sz; char *dest_p = GET_ADDRESS_FROM_DESC(dest); if (typ_sz == sizeof(i1) && ALIGNED_i1(dest_p) && ALIGNED_i1(source_p)) { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { *(i1 *)dest_p = *(i1 *)source_p; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(dest_p) && ALIGNED_i2(source_p) ) { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { *(i2 *)dest_p = *(i2 *)source_p; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(dest_p) && ALIGNED_r4(source_p) ) { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { *(ui4 *)dest_p = *(ui4 *)source_p; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(dest_p) && ALIGNED_r8(source_p) ) { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { *(ui8 *)dest_p = *(ui8 *)source_p; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(dest_p) && ALIGNED_r16(source_p) ) { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { *(ui16 *)dest_p = *(ui16 *)source_p; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } else { while (counter[dest_rank] < dest_extent[dest_rank] ) { for ( i = 0; i < a_size; i ++ ) { char *ap = dest_p; char *sp = source_p; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (ap, sp, typ_sz); else for (j = 0; j < typ_sz; j ++) *ap++ = *sp++; source_p += s_stride; dest_p += a_stride; } counter[0] = a_size; j = 0; while ((counter[j] == dest_extent[j]) && (j < dest_rank)) { dest_p += dest_offset[j]; counter[j+1]++; counter[j] = 0; j ++; } } } }
void _SPREAD( DopeVectorType *result, DopeVectorType *array, i4 *dim, i4 *ncopies) { char * result_p, * result_b ; char * array_p, * array_b ; size_t src_extent [MAX_NARY_DIMS] ; size_t src_stride [MAX_NARY_DIMS] ; size_t src_offset [MAX_NARY_DIMS] ; size_t counter[MAX_NARY_DIMS] ; size_t res_stride [MAX_NARY_DIMS] ; size_t res_extent [MAX_NARY_DIMS] ; size_t res_offset [MAX_NARY_DIMS] ; int32_t j,ii; char *rp, *ap ; int32_t res_rank ; int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); size_t a_size,a_stride,r_stride, i,k ; int8_t zero_szd_source = FALSE; int8_t byte_aligned = FALSE; int32_t ddim ; size_t num_trues ; int32_t local_alloc ; size_t tot_ext ; size_t str_sz ; int32_t nc ; size_t src_size ; size_t res_sz; size_t xfer_sz; size_t tot_sz; ddim = (*dim) - 1 ; if ((ddim > src_rank + 1) || (ddim < 0)) ERROR(_LELVL_ABORT,FESCIDIM); nc = * ncopies ; if (nc < 0) nc = 0 ; src_extent[0] = 1; src_stride[0] = 0; src_offset[0] = 0; for ( j = 0 ; j <= src_rank ; j ++ ) { src_extent[j] = GET_EXTENT_FROM_DESC(array,j) ; src_stride[j] = GET_STRIDE_FROM_DESC(array,j) ; } for ( j = 1 ; j <= src_rank ; j ++ ) { src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1])) ; } res_rank = src_rank + 2 ; if (src_rank < 0 ) src_rank = 0 ; byte_aligned = GET_BYTEALIGNED_FROM_DESC(result); if (!GET_ASSOCIATED_FROM_DESC(result)) { size_t nbytes ; char *p ; SET_ADDRESS_IN_DESC(result,NULL); SET_ORIG_BS_IN_DESC(result,NULL) ; SET_ORIG_SZ_IN_DESC(result,0) ; p = NULL ; tot_ext = 1 ; nbytes = typ_sz ; str_sz = MK_STRIDE(byte_aligned,typ_sz); for ( i = 0 , j = 0 ; i < res_rank ; i ++) { size_t ex ; SET_LBOUND_IN_DESC(result,i,1); if (i != ddim ) { ex = src_extent[j]; j ++ ; } else { ex = nc ; } SET_EXTENT_IN_DESC(result,i,ex); SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz ); tot_ext *= ex; nbytes *= ex; } if (nbytes > 0) { p = (void *) malloc (nbytes); if (p == NULL) ERROR(_LELVL_ABORT, FENOMEMY); SET_ADDRESS_IN_DESC(result,p); } SET_CONTIG_IN_DESC(result); SET_ASSOCIATED_IN_DESC(result); if (GET_DV_ASCII_FROM_DESC(array)) { SET_CHARPTR_IN_DESC(result,p,typ_sz); } SET_ORIG_BS_IN_DESC(result,p) ; SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ; } res_stride[res_rank-1] = GET_STRIDE_FROM_DESC(result,ddim) ; res_extent[res_rank-1] = GET_EXTENT_FROM_DESC(result,ddim) ; for ( j = 0 , k = 0; j < res_rank ; j ++ ) { if (j != ddim ) { res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ; res_extent[k] = GET_EXTENT_FROM_DESC(result,j) ; k ++ ; } counter[j] = 0 ; } for ( j = 1 ; j < res_rank ; j ++ ) res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1])) ; if (zero_szd_source) return ; a_size = src_extent[0] ; a_stride = src_stride[0] ; r_stride = res_stride[0] ; array_p = GET_ADDRESS_FROM_DESC(array); result_p = GET_ADDRESS_FROM_DESC(result); if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p)) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { *(i1 *)rp1 = *(i1 *)array_p ; rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { *(i2 *)rp1 = *(i2 *)array_p ; rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui4 *)rp1 = *(ui4 *)array_p ; #else /* KEY bug 8062 */ *(r4 *)rp1 = *(r4 *)array_p ; #endif /* KEY bug 8062 */ rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui8 *)rp1 = *(ui8 *)array_p ; #else /* KEY bug 8062 */ *(r8 *)rp1 = *(r8 *)array_p ; #endif /* KEY bug 8062 */ rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) ) { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { #ifdef KEY /* Bug 4039 */ *(ui16 *)rp1 = *(ui16 *)array_p ; #else /* KEY Bug 4039 */ *(r16 *)rp1 = *(r16 *)array_p ; #endif /* KEY Bug 4039 */ rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else { while (counter[src_rank] < src_extent[src_rank] ) { for ( i = 0 ; i < a_size ; i ++ ) { char * rp1; rp1 = result_p ; for (k = 0 ; k < nc ; k ++ ) { rp = rp1 ; ap = array_p ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; rp1 += res_stride[res_rank-1] ; } result_p += r_stride ; array_p += a_stride ; } counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } }
void _EOSHIFT( DopeVectorType *result, DopeVectorType *array, DopeVectorType *shift, DopeVectorType *boundary, i4 *dim) { char * result_p, * result_b ; char * array_p, * array_b ; char * shift_p, * shift_b ; char * boundary_p, * boundary_b ; size_t src_extent [MAX_NARY_DIMS] ; size_t src_stride [MAX_NARY_DIMS] ; size_t src_offset [MAX_NARY_DIMS] ; size_t counter[MAX_NARY_DIMS] ; size_t res_stride [MAX_NARY_DIMS] ; size_t res_extent [MAX_NARY_DIMS] ; size_t res_offset [MAX_NARY_DIMS] ; int32_t j,ii; char *rp, *ap ; int32_t res_rank ; int32_t shf_rank ; int32_t src_rank = GET_RANK_FROM_DESC(array) - 1; size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array); size_t a_size,a_stride,r_stride, i,k ; int8_t zero_szd_source = FALSE; int8_t byte_aligned = FALSE; size_t r_offs1,r_offs2 ; size_t a_offs,a_bump,r_bump ; size_t ll1,ll2; int64_t shft,shf_typ_sz ; char *rp1, *ap1, *ap2 ; int32_t ddim ; size_t shf_stride [MAX_NARY_DIMS] ; size_t shf_offset [MAX_NARY_DIMS] ; size_t fill_stride [MAX_NARY_DIMS] ; size_t fill_offset [MAX_NARY_DIMS] ; size_t f_stride ; int32_t fill_rank ; size_t num_trues ; int32_t local_alloc ; size_t tot_ext ; size_t str_sz ; size_t src_size ; size_t res_sz; size_t xfer_sz; size_t tot_sz; int8_t computed_shift = FALSE ; const r16 zero_fill = 0; const char zero_sp[17] = " " ; ddim = (*dim) - 1 ; if ((ddim > src_rank) || (ddim < 0)) ERROR(_LELVL_ABORT,FESCIDIM); src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ; src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ; byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ; for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { if (j != ddim ) { src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ; src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ; src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1])) ; k++ ; } counter[j] = 0 ; fill_offset[j] = 0 ; shf_offset[j] = 0 ; zero_szd_source = zero_szd_source || (src_extent[j] == 0) ; } if (!GET_ASSOCIATED_FROM_DESC(result)) { size_t nbytes ; size_t ext ; char *p ; SET_ADDRESS_IN_DESC(result,NULL); SET_ORIG_BS_IN_DESC(result,NULL) ; SET_ORIG_SZ_IN_DESC(result,0) ; p = NULL ; tot_ext = 1 ; nbytes = typ_sz ; str_sz = MK_STRIDE(byte_aligned,typ_sz); for ( i = 0 ; i <= src_rank ; i ++) { ext = GET_EXTENT_FROM_DESC(array,i) ; SET_LBOUND_IN_DESC(result,i,1); SET_EXTENT_IN_DESC(result,i,ext); SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz ); tot_ext *= ext; nbytes *= ext; } if (nbytes > 0) { p = (void *) malloc (nbytes); if (p == NULL) ERROR(_LELVL_ABORT, FENOMEMY); SET_ADDRESS_IN_DESC(result,p); } SET_ASSOCIATED_IN_DESC(result); SET_CONTIG_IN_DESC(result); if (GET_DV_ASCII_FROM_DESC(array)) { SET_CHARPTR_IN_DESC(result,p,typ_sz); } SET_ORIG_BS_IN_DESC(result,p) ; SET_ORIG_SZ_IN_DESC(result,nbytes * 8 ) ; } res_stride[0] = GET_STRIDE_FROM_DESC(result,ddim) ; for ( j = 0, k = 1 ; j <= src_rank ; j ++ ) { if (j != ddim ) { res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ; res_offset[k-1] = res_stride[k] - (res_stride [k-1] * (src_extent[k-1])) ; k++ ; } } shf_typ_sz = GET_ELEMENT_SZ_FROM_DESC(shift); shf_rank = GET_RANK_FROM_DESC(shift); shift_p = GET_ADDRESS_FROM_DESC(shift); shf_stride[0] = 0 ; for ( j = 0 ; j < shf_rank ; j ++ ) { shf_stride[j] = GET_STRIDE_FROM_DESC(shift,j) ; } for ( j = 1 ; j < shf_rank ; j ++ ) { shf_offset[j] = shf_stride[j] - (shf_stride [j-1] * (src_extent[j])) ; } if (boundary != NULL) { boundary_p = GET_ADDRESS_FROM_DESC(boundary); fill_rank = GET_RANK_FROM_DESC(boundary) ; fill_stride[0] = 0; for ( j = 0 ; j < fill_rank ; j ++ ) { fill_stride[j] = GET_STRIDE_FROM_DESC(boundary,j) ; } fill_offset[0] = 0 ; for ( j = 1 ; j < fill_rank ; j ++ ) { fill_offset[j] = fill_stride[j] - (fill_stride [j-1] * (src_extent[j])) ; } } else { for ( j = 0 ; j <= src_rank ; j ++ ) { fill_stride[j] = 0 ; fill_offset[0] = 0 ; } if (GET_DV_ASCII_FROM_DESC(array)) boundary_p = (char *) & zero_sp; else boundary_p = (char *) & zero_fill; } a_bump = src_extent[0] * src_stride[0] ; r_bump = src_extent[0] * res_stride[0] ; if (zero_szd_source) return ; a_size = src_extent[0] ; a_stride = src_stride[0] ; r_stride = res_stride[0] ; f_stride = fill_stride[0] ; array_p = GET_ADDRESS_FROM_DESC(array); result_p = GET_ADDRESS_FROM_DESC(result); if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p) && ALIGNED_i1(boundary_p)) { while (counter[src_rank] < src_extent[src_rank] ) { i1 lfill = 0 ; if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { *(i1 *)rp1 = *(i1 *)ap1 ; rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; lfill = *(i1 *) boundary_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(i1 *)rp1 = lfill ; rp1 += r_stride ; } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) && ALIGNED_i2(boundary_p)) { while (counter[src_rank] < src_extent[src_rank] ) { i2 lfill = 0 ; if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { *(i2 *)rp1 = *(i2 *)ap1 ; rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; lfill = *(i2 *) boundary_p ; for ( k = 0 ; k < ll2 ; k ++ ) { *(i2 *)rp1 = lfill ; rp1 += r_stride ; } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) && ALIGNED_r4(boundary_p)) { while (counter[src_rank] < src_extent[src_rank] ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ ui4 lfill = 0 ; #else /* KEY bug 8062 */ r4 lfill = 0 ; #endif /* KEY bug 8062 */ if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui4 *)rp1 = *(ui4 *)ap1 ; #else /* KEY bug 8062 */ *(r4 *)rp1 = *(r4 *)ap1 ; #endif /* KEY bug 8062 */ rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ lfill = *(ui4 *) boundary_p ; #else /* KEY bug 8062 */ lfill = *(r4 *) boundary_p ; #endif /* KEY bug 8062 */ for ( k = 0 ; k < ll2 ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui4 *)rp1 = lfill ; #else /* KEY bug 8062 */ *(r4 *)rp1 = lfill ; #endif /* KEY bug 8062 */ rp1 += r_stride ; } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) && ALIGNED_r8(boundary_p)) { while (counter[src_rank] < src_extent[src_rank] ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ ui8 lfill = 0 ; #else /* KEY bug 8062 */ r8 lfill = 0 ; #endif /* KEY bug 8062 */ if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui8 *)rp1 = *(ui8 *)ap1 ; #else /* KEY bug 8062 */ *(r8 *)rp1 = *(r8 *)ap1 ; #endif /* KEY bug 8062 */ rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ lfill = *(ui8 *) boundary_p ; #else /* KEY bug 8062 */ lfill = *(r8 *) boundary_p ; #endif /* KEY bug 8062 */ for ( k = 0 ; k < ll2 ; k ++ ) { #ifdef KEY /* bug 8062 */ /* Using IEEE FP on non-FP data might change bits during assign */ *(ui8 *)rp1 = lfill ; #else /* KEY bug 8062 */ *(r8 *)rp1 = lfill ; #endif /* KEY bug 8062 */ rp1 += r_stride ; } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) && ALIGNED_r16(boundary_p)) { while (counter[src_rank] < src_extent[src_rank] ) { #ifdef KEY /* Bug 4039 */ ui16 lfill = { 0, 0 } ; #else /* KEY Bug 4039 */ r16 lfill = 0 ; #endif /* KEY Bug 4039 */ if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { #ifdef KEY /* Bug 4039 */ *(ui16 *)rp1 = *(ui16 *)ap1 ; #else /* KEY Bug 4039 */ *(r16 *)rp1 = *(r16 *)ap1 ; #endif /* KEY Bug 4039 */ rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; #ifdef KEY /* Bug 4039 */ lfill = *(ui16 *) boundary_p ; #else /* KEY Bug 4039 */ lfill = *(r16 *) boundary_p ; #endif /* KEY Bug 4039 */ for ( k = 0 ; k < ll2 ; k ++ ) { #ifdef KEY /* Bug 4039 */ *(ui16 *)rp1 = lfill ; #else /* KEY Bug 4039 */ *(r16 *)rp1 = lfill ; #endif /* KEY Bug 4039 */ rp1 += r_stride ; } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } else { while (counter[src_rank] < src_extent[src_rank] ) { char * lfill ; if (!computed_shift) { switch (shf_typ_sz) { case sizeof(i1): shft = * (i1 *)shift_p ; break ; case sizeof(i2): shft = * (i2 *)shift_p ; break ; case sizeof(i4): shft = * (i4 *)shift_p ; break ; case sizeof(i8): shft = * (i8 *)shift_p ; break ; } shift_p += shf_stride[0]; if (shft < 0 ) { if (shft < (-(int64_t) src_extent[0])) shft = (-(int64_t) src_extent[0]); ll1 = ((int64_t)src_extent[0]) - abs(shft) ; ll2 = abs(shft) ; r_offs1 = res_stride[0] * ll2 ; r_offs2 = 0 ; a_offs = 0 ; } else { if (shft > (int64_t)src_extent[0]) shft = (int64_t)src_extent[0] ; ll1 = ((int64_t)src_extent[0]) - shft ; ll2 = shft ; r_offs1 = 0 ; r_offs2 = res_stride[0] * ll1 ; a_offs = a_stride * ll2 ; } if (shf_rank == 0 ) computed_shift = TRUE; } ap1 = array_p + a_offs ; rp1 = result_p + r_offs1; for ( k = 0 ; k < ll1 ; k ++ ) { ap = ap1 ; rp = rp1 ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ; rp1 += r_stride ; ap1 += a_stride ; } rp1 = result_p + r_offs2 ; lfill = boundary_p ; if (boundary != NULL) { for ( k = 0 ; k < ll2 ; k ++ ) { ap2 = lfill ; rp = rp1 ; if (typ_sz > BIGDEFAULTSZ) (void) memcpy (rp, ap2, typ_sz); else for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap2 ++ ; rp1 += r_stride ; } } else { for ( k = 0 ; k < ll2 ; k ++ ) { rp = rp1 ; for (j = 0 ; j < typ_sz ; j ++) *rp++ = *lfill ; rp1 += r_stride ; } } array_p += a_bump ; result_p += r_bump ; boundary_p += f_stride ; counter[0] = a_size ; j = 0 ; while ((counter[j] == src_extent[j]) && (j < src_rank)) { array_p += src_offset[j] ; result_p += res_offset[j] ; shift_p += shf_offset[j] ; boundary_p += fill_offset[j] ; counter[j+1]++ ; counter[j] = 0 ; j ++ ; } } } }