示例#1
0
void 
_RESHAPE(
DopeVectorType	*result,
DopeVectorType	*array,
DopeVectorType	*shape,
DopeVectorType	*pad,
DopeVectorType	*order)
{
  char  * result_p, * result_b ;
  char  * array_p, * array_b ;
  i4 * shape_p, * shape_b ;
  char * pad_p, * pad_b ;
  i4 * order_p, * order_b ;

  size_t src_extent [MAX_NARY_DIMS] ;
  size_t src_stride [MAX_NARY_DIMS] ;
  size_t src_offset [MAX_NARY_DIMS] ;
  size_t counter[MAX_NARY_DIMS] ;

  size_t res_stride [MAX_NARY_DIMS] ;
  size_t res_extent [MAX_NARY_DIMS] ;
  size_t res_offset [MAX_NARY_DIMS] ;
  size_t res_counter[MAX_NARY_DIMS] ;

  size_t pad_stride [MAX_NARY_DIMS] ;
  size_t pad_extent [MAX_NARY_DIMS] ;
  size_t pad_offset [MAX_NARY_DIMS] ;

  int32_t  l_order[MAX_NARY_DIMS] ;
  int32_t  l_order_chk[MAX_NARY_DIMS] ;
  int32_t  l_shape[MAX_NARY_DIMS] ;

  int32_t j,ii;
  char *rp, *ap ;
  int32_t *gp1  ;
  int32_t pad_rank ;
  int32_t shp_rank ;
  int32_t res_rank ;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz   = GET_ELEMENT_SZ_FROM_DESC(array);

  size_t a_size,a_stride,r_stride, i,k ;
  size_t l_size,l_stride;
  size_t tot_shape, tot_source;
  char *   l_p ;
  int8_t  zero_szd_shape = FALSE;
  int8_t  zero_szd_order = FALSE;
  int8_t  zero_szd_source = FALSE;
  int8_t  zero_szd_pad = FALSE;
  int8_t  byte_aligned = FALSE;

  int32_t ddim ;

  size_t  num_trues ;
  int32_t local_alloc ;
  size_t  tot_ext ;
  size_t  str_sz  ;

  size_t src_size ;

  size_t  res_sz;
  size_t  xfer_sz;
  size_t  tot_sz;

  tot_source = 1 ;
  for( j = 0 ; j <= src_rank ; j ++  ) {
    src_extent[j]  = GET_EXTENT_FROM_DESC(array,j) ;
    src_stride[j]  = GET_STRIDE_FROM_DESC(array,j) ;
    counter[j] = 0 ;
    zero_szd_source = zero_szd_source || (src_extent[j] == 0) ;
    tot_source *= src_extent[j];
  }

  for ( j = 1 ; j <= src_rank ; j ++  )
    src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1]))  ;

  byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ;

  res_rank = GET_EXTENT_FROM_DESC(shape,0) ;
  if (res_rank == 0)
    ERROR(_LELVL_ABORT, FESHPSZZ);

  l_stride = GET_STRIDE_FROM_DESC(shape,0);
  l_size   = GET_ELEMENT_SZ_FROM_DESC(shape);
  l_p      = GET_ADDRESS_FROM_DESC(shape);

  switch (l_size) {
  case 1:
    for (j = 0 ; j < res_rank ; j++ ) {
      l_shape[j] = * (int8_t *) l_p ;
      l_p += l_stride;
    }
    break;

  case 2:
    for (j = 0 ; j < res_rank ; j++ ) {
      l_shape[j] = * (int16_t *) l_p ;
      l_p += l_stride;
    }
    break;

  case 4:
    for (j = 0 ; j < res_rank ; j++ ) {
      l_shape[j] = * (int32_t *) l_p ;
      l_p += l_stride;
    }
    break;

  case 8:
    for (j = 0 ; j < res_rank ; j++ ) {
      l_shape[j] = * (int64_t *) l_p ;
      l_p += l_stride;
    }
    break;
  }

  tot_shape = 1;
  for (i = 0; i < res_rank; i++) {
    if (l_shape[i] < 0)
      ERROR (_LELVL_ABORT, FERSHNEG);
    zero_szd_shape = zero_szd_shape || (l_shape[i] == 0);
    tot_shape *= l_shape[i];

  }

  if (order == NULL) {
    for (j = 0 ; j < res_rank ; j++ )
      l_order[j] = j + 1;

  } else {

    l_stride = GET_STRIDE_FROM_DESC(order,0);
    l_size   = GET_ELEMENT_SZ_FROM_DESC(order);
    l_p      = GET_ADDRESS_FROM_DESC(order);

    switch (l_size) {
    case 1:
      for (j = 0 ; j < res_rank ; j++ ) {
	l_order_chk[j] = FALSE;
	l_order[j] = * (int8_t *) l_p ;
	l_p += l_stride;
      }
      break;

    case 2:
      for (j = 0 ; j < res_rank ; j++ ) {
	l_order_chk[j] = FALSE;
	l_order[j] = * (int16_t *) l_p ;
	l_p += l_stride;
      }
      break;

    case 4:
      for (j = 0 ; j < res_rank ; j++ ) {
	l_order_chk[j] = FALSE;
	l_order[j] = * (int32_t *) l_p ;
	l_p += l_stride;
      }
      break;

    case 8:
      for (j = 0 ; j < res_rank ; j++ ) {
	l_order_chk[j] = FALSE;
	l_order[j] = * (int64_t *) l_p ;
	l_p += l_stride;
      }
      break;
    }

    for (i = 0; i < res_rank; i++) {
      if (l_order[i] <= 0 || l_order[i] > res_rank)
	ERROR(_LELVL_ABORT, FEBDORDR);
      l_order_chk[l_order[i]-1] = TRUE;
      zero_szd_order = zero_szd_order || (l_order[i] == 0) ;
    }
    for (i = 0; i < res_rank; i++) {
      if (!l_order_chk[i])
	ERROR(_LELVL_ABORT, FEBDORDR);
    }
  }

  if (pad != NULL ) {
    pad_p = GET_ADDRESS_FROM_DESC(pad);
    pad_rank = GET_RANK_FROM_DESC(pad) - 1;
    for ( j = 0 ; j <= pad_rank ; j ++  ) {
      pad_extent[j]  = GET_EXTENT_FROM_DESC(pad,j) ;
      pad_stride[j]  = GET_STRIDE_FROM_DESC(pad,j) ;
      zero_szd_pad   = zero_szd_pad || (pad_extent[j] == 0) ;

    }
    for ( j = 1 ; j <= pad_rank ; j ++  )
      pad_offset[j-1] = pad_stride[j] - (pad_stride [j-1] * (pad_extent[j-1]))  ;

  } else if (tot_shape > tot_source) {
    ERROR(_LELVL_ABORT, FERSHNPD);
  }

  if (!GET_ASSOCIATED_FROM_DESC(result)) {

    size_t  nbytes  ;
    char    *p      ;

    SET_ADDRESS_IN_DESC(result,NULL);
    SET_ORIG_BS_IN_DESC(result,NULL) ;
    SET_ORIG_SZ_IN_DESC(result,0) ;
    SET_RANK_IN_DESC(result,res_rank) ;

    p = NULL ;
    tot_ext = 1 ;
    nbytes  = typ_sz ;
    str_sz  = MK_STRIDE(byte_aligned,typ_sz);

    for ( i = 0 ; i < res_rank ; i ++) {
      SET_LBOUND_IN_DESC(result,i,1);
      SET_EXTENT_IN_DESC(result,i,l_shape[i]);
      SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
      tot_ext *= l_shape[i];
      nbytes  *= l_shape[i];
    }

    if (nbytes > 0 && !zero_szd_order) {
      p = (void *) malloc (nbytes);
      if (p == NULL)
	ERROR(_LELVL_ABORT, FENOMEMY);

      SET_ADDRESS_IN_DESC(result,p);
    }

    SET_ASSOCIATED_IN_DESC(result);
    SET_CONTIG_IN_DESC(result);
    SET_ALEN_IN_DESC(result,GET_ALEN_FROM_DESC(array));
    if (GET_DV_ASCII_FROM_DESC(array)) {
      SET_CHARPTR_IN_DESC(result,p,typ_sz);
    }
    SET_ORIG_BS_IN_DESC(result,p) ;
    SET_ORIG_SZ_IN_DESC(result,nbytes*8) ;
  }

  if (zero_szd_shape || zero_szd_order)
    return ;

  if (zero_szd_source && (pad == NULL || zero_szd_pad))
    ERROR(_LELVL_ABORT, FERSHNPD);

  for ( j = 0 , gp1 = l_order ; j < res_rank ; j ++  ) {
    if (gp1 == NULL)
      ii = j ;
    else 
      ii = (*gp1++)-1  ;

    res_stride[j]  = GET_STRIDE_FROM_DESC(result,ii) ;
    res_extent[j]  = GET_EXTENT_FROM_DESC(result,ii) ;
    res_counter[j] = 0 ;
  }

  for ( j = 1 ; j < res_rank ; j ++  )
    res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1]))  ;
  res_rank -- ;
  if (zero_szd_source)
    if (pad != NULL)
      for (i = 0 ; i <= src_rank ; i ++) src_extent[i] = 0;
    else
      return ;

  a_size   = src_extent[0] ;
  a_stride = src_stride[0] ;
  r_stride = res_stride[0] ;
  array_p = GET_ADDRESS_FROM_DESC(array);
  result_p = GET_ADDRESS_FROM_DESC(result);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) &&  ALIGNED_i1(result_p) &&  ((pad_p == NULL) || ALIGNED_i1(pad_p))) {

    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  *(i1 *)result_p = *(i1 *)array_p ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) &&  ALIGNED_i2(result_p) && ((pad_p == NULL) || ALIGNED_i2(pad_p))) {

    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  *(i2 *)result_p = *(i2 *)array_p ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) &&  ALIGNED_r4(result_p) && ((pad_p == NULL) || ALIGNED_r4(pad_p))) {

    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  *(r4 *)result_p = *(r4 *)array_p ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) &&  ALIGNED_r8(result_p) && ((pad_p == NULL) || ALIGNED_r8(pad_p))) {

    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  *(r8 *)result_p = *(r8 *)array_p ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) &&  ALIGNED_r16(result_p) && ((pad_p == NULL) || ALIGNED_r16(pad_p))) {

    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  *(r16 *)result_p = *(r16 *)array_p ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  } else {
    for (;;) {
      while (counter[src_rank] < src_extent[src_rank] ) {
	for ( i = 0 ; i < a_size ; i ++ ) {
	  ap = array_p ;
	  rp = result_p ;
	  if (typ_sz > BIGDEFAULTSZ)
	    (void) memcpy (rp, ap, typ_sz);
	  else
	    for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	  array_p += a_stride ;
	  result_p += r_stride ;

	  j = 0 ;
	  res_counter[0] ++ ;

	  while (res_counter[j] == res_extent[j]) {
	    if (j == res_rank ) return ;
	    result_p += res_offset[j] ;
	    res_counter[j+1]++ ;
	    res_counter[j] = 0 ;
	    j ++ ;
	  }
	}
	counter[0] = a_size  ;
	j = 0 ;
	while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	  array_p += src_offset[j] ;
	  counter[j+1]++ ;
	  counter[j] = 0 ;
	  j ++ ;
	}

      }
      if (pad != NULL)  {

	src_rank = pad_rank ;

	for ( j = 0 ; j <= src_rank ; j ++  ) {
	  src_extent [j] = pad_extent[j] ;
	  src_stride [j] = pad_stride[j] ;
	  counter[j] =  0 ;
	  src_offset [j] = pad_offset[j] ;
	}
	array_p = pad_p ;
	a_size    = src_extent [0]  ;
	a_stride  = src_stride [0]  ;
      }
    }
  }
}
示例#2
0
文件: cshift_gen.c 项目: xyuan/Path64
void 
_CSHIFT(
DopeVectorType	*result,
DopeVectorType	*array,
DopeVectorType	*shift,
i4 *dim)
{
  char  * result_p, * result_b ;
  char  * array_p, * array_b ;
  char * shift_p, * shift_b ;

  size_t src_extent [MAX_NARY_DIMS] ;
  size_t src_stride [MAX_NARY_DIMS] ;
  size_t src_offset [MAX_NARY_DIMS] ;
  size_t counter[MAX_NARY_DIMS] ;

  size_t res_stride [MAX_NARY_DIMS] ;
  size_t res_extent [MAX_NARY_DIMS] ;
  size_t res_offset [MAX_NARY_DIMS] ;

  int32_t j,ii;
  char *rp, *ap ;
  int32_t res_rank ;
  int32_t shf_rank ;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz   = GET_ELEMENT_SZ_FROM_DESC(array);

  size_t a_size,a_stride,r_stride, i,k ;
  int8_t  zero_szd_source = FALSE;
  int8_t  byte_aligned = FALSE;

  size_t  a_offs,a_bump,r_bump ;
  size_t ll1,ll2;
  int64_t shft,shf_typ_sz  ;
  char  *rp1,  *ap1, *ap2  ;
  int32_t ddim ;

  size_t shf_stride [MAX_NARY_DIMS] ;
  size_t shf_offset [MAX_NARY_DIMS] ;
  size_t  num_trues ;
  int32_t local_alloc ;
  size_t  tot_ext ;
  size_t  str_sz  ;

  size_t src_size ;

  size_t  res_sz;
  size_t  xfer_sz;
  size_t  tot_sz;

  int8_t computed_shift = FALSE ;
  ddim = (*dim) - 1 ;

  if ((ddim > src_rank) || (ddim < 0))
    ERROR(_LELVL_ABORT,FESCIDIM);

  src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ;
  src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ;
  byte_aligned  = GET_BYTEALIGNED_FROM_DESC(array) ;

  for ( j = 0, k = 1 ; j <= src_rank ; j ++  ) {
    if (j != ddim ) {
      src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ;
      src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ;
      src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1]))  ;
      k++ ;
    }
    counter[j] = 0 ;
    shf_offset[j] = 0 ;
    zero_szd_source = zero_szd_source || (src_extent[j] == 0) ;
  }

  if (!GET_ASSOCIATED_FROM_DESC(result)) {

    size_t  nbytes  ;
    size_t  ext  ;
    char    *p      ;

    SET_ADDRESS_IN_DESC(result,NULL);
    SET_ORIG_BS_IN_DESC(result,NULL) ;
    SET_ORIG_SZ_IN_DESC(result,0) ;

    p = NULL ;
    tot_ext = 1 ;
    nbytes  = typ_sz ;
    str_sz  = MK_STRIDE(byte_aligned,typ_sz);

    for ( i = 0 ; i <= src_rank ; i ++) {
      ext = GET_EXTENT_FROM_DESC(array,i) ;
      SET_LBOUND_IN_DESC(result,i,1);
      SET_EXTENT_IN_DESC(result,i,ext);
      SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
      tot_ext *= ext;
      nbytes  *= ext;
    }

    if (nbytes > 0) {
      p = (void *) malloc (nbytes);
      if (p == NULL)
	ERROR(_LELVL_ABORT, FENOMEMY);

      SET_ADDRESS_IN_DESC(result,p);
    }

    SET_ASSOCIATED_IN_DESC(result);
    SET_CONTIG_IN_DESC(result);
    if (GET_DV_ASCII_FROM_DESC(array)) {
      SET_CHARPTR_IN_DESC(result,p,typ_sz);
    }
    SET_ORIG_BS_IN_DESC(result,p) ;
    SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ;
  }

  res_stride[0] = GET_STRIDE_FROM_DESC(result,ddim) ;

  for ( j = 0, k = 1  ; j <= src_rank ; j ++  ) {
    if (j != ddim ) {
      res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ;
      res_offset[k-1] = res_stride[k] - (res_stride [k-1] * (src_extent[k-1])) ;
      k++ ;
    }
  }

  shf_typ_sz = GET_ELEMENT_SZ_FROM_DESC(shift);
  shf_rank   = GET_RANK_FROM_DESC(shift);
  shift_p    = GET_ADDRESS_FROM_DESC(shift);

  shf_stride[0] = 0  ;
  for ( j = 0 ; j < shf_rank ; j ++  ) {
    shf_stride[j] = GET_STRIDE_FROM_DESC(shift,j) ;
  }

  for ( j = 1 ; j < shf_rank ; j ++  ) {
    shf_offset[j] = shf_stride[j] - (shf_stride [j-1] * (src_extent[j])) ;
  }

  a_bump = src_extent[0] * src_stride[0] ;
  r_bump = src_extent[0] * res_stride[0] ;

  if (zero_szd_source)
    return ;

  a_size   = src_extent[0] ;
  a_stride = src_stride[0] ;
  r_stride = res_stride[0] ;
  array_p = GET_ADDRESS_FROM_DESC(array);
  result_p = GET_ADDRESS_FROM_DESC(result);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) &&  ALIGNED_i1(result_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(i1 *)result_p = *(i1 *)ap1 ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(i1 *)result_p = *(i1 *)ap2 ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) &&  ALIGNED_i2(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(i2 *)result_p = *(i2 *)ap1 ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(i2 *)result_p = *(i2 *)ap2 ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) &&  ALIGNED_r4(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(r4 *)result_p = *(r4 *)ap1 ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(r4 *)result_p = *(r4 *)ap2 ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) &&  ALIGNED_r8(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(r8 *)result_p = *(r8 *)ap1 ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(r8 *)result_p = *(r8 *)ap2 ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) &&  ALIGNED_r16(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(r16 *)result_p = *(r16 *)ap1 ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(r16 *)result_p = *(r16 *)ap2 ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else {
    while (counter[src_rank] < src_extent[src_rank] ) {
      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1) : 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2) : 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4) : 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8) : 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}

	shft = shft % (int64_t)src_extent[0];
	if (shft < 0 ) {
	  ll1 = abs(shft) ;
	  ll2 = (int64_t)src_extent[0] - abs(shft)  ;

	} else {
	  ll1 = (int64_t)src_extent[0] -  shft ;
	  ll2 = shft ;
	}
	a_offs   = a_stride * ll2 ;
	shift_p  += shf_stride[0] ;

	if (shf_rank == 0)
	  computed_shift = TRUE;
      }

      ap1 = array_p + a_offs ;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	rp = result_p ;
	ap = ap1 ;
	if (typ_sz > BIGDEFAULTSZ)
	  (void) memcpy (rp, ap, typ_sz);
	else
	  for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	result_p += r_stride ;
	ap1 += a_stride ;
      }

      ap2 = array_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	rp = result_p ;
	ap = ap2 ;
	if (typ_sz > BIGDEFAULTSZ)
	  (void) memcpy (rp, ap, typ_sz);
	else
	  for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	result_p += r_stride ;

	ap2 += a_stride ;
      }
      array_p += a_bump ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  }
}
示例#3
0
/*
 * Copy array section "array" to contiguous array "result". Modeled after
 * "pack".
 */
void 
_Copyin(
void	*result,
DopeVectorType	*array)
{
  char *result_p = (char *) result;

  size_t src_extent[MAX_NARY_DIMS];
  size_t src_stride[MAX_NARY_DIMS];
  size_t src_offset[MAX_NARY_DIMS];
  size_t counter[MAX_NARY_DIMS];

  size_t i;
  int32_t j;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);

  int8_t  zero_szd_source = FALSE;

  size_t src_size = 1;

  for ( j = 0; j <= src_rank ; j ++  ) {
    src_extent[j] = GET_EXTENT_FROM_DESC(array,j);
    src_stride[j] = GET_STRIDE_FROM_DESC(array,j);
    src_size *= src_extent[j];
    counter[j] = 0;
    zero_szd_source = zero_szd_source || (src_extent[j] == 0);
  }

  for ( j = 1; j <= src_rank ; j ++  )
    src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1]));

  int8_t byte_aligned  = GET_BYTEALIGNED_FROM_DESC(array);
  size_t tot_ext       = src_size;

  if (zero_szd_source)
    return;

  size_t a_size   = src_extent[0];
  size_t a_stride = src_stride[0];
  size_t r_stride = typ_sz;
  char *array_p = GET_ADDRESS_FROM_DESC(array);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) &&  ALIGNED_i1(result_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	*(i1 *)result_p = *(i1 *)array_p;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) &&  ALIGNED_i2(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	*(i2 *)result_p = *(i2 *)array_p;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) &&  ALIGNED_r4(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	*(ui4 *)result_p = *(ui4 *)array_p;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) &&  ALIGNED_r8(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	*(ui8 *)result_p = *(ui8 *)array_p;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) &&  ALIGNED_r16(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	*(ui16 *)result_p = *(ui16 *)array_p;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else {
    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0; i < a_size ; i ++ ) {
	char *ap = array_p;
	char *rp = result_p;
	if (typ_sz > BIGDEFAULTSZ)
	  (void) memcpy (rp, ap, typ_sz);
	else
	  for (j = 0; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	result_p += r_stride;
	array_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  }
}
示例#4
0
/*
 * Copy contiguous array "source" to array section "dest". Modeled after
 * "pack".
 */
void 
_Copyout(
DopeVectorType	*dest,
void	*source)
{
  char *source_p = (char *) source;

  size_t dest_extent[MAX_NARY_DIMS];
  size_t dest_stride[MAX_NARY_DIMS];
  size_t dest_offset[MAX_NARY_DIMS];
  size_t counter[MAX_NARY_DIMS];

  size_t i;
  int32_t j;
  int32_t dest_rank = GET_RANK_FROM_DESC(dest) - 1;

  size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(dest);

  int8_t  zero_szd_dest = FALSE;

  size_t dest_size = 1;

  for ( j = 0; j <= dest_rank; j ++  ) {
    dest_extent[j] = GET_EXTENT_FROM_DESC(dest,j);
    dest_stride[j] = GET_STRIDE_FROM_DESC(dest,j);
    dest_size *= dest_extent[j];
    counter[j] = 0;
    zero_szd_dest = zero_szd_dest || (dest_extent[j] == 0);
  }

  for ( j = 1; j <= dest_rank; j ++  )
    dest_offset[j-1] = dest_stride[j] - (dest_stride [j-1] * (dest_extent[j-1]));

  int8_t byte_aligned  = GET_BYTEALIGNED_FROM_DESC(dest);
  size_t tot_ext       = dest_size;

  if (zero_szd_dest)
    return;

  size_t a_size   = dest_extent[0];
  size_t a_stride = dest_stride[0];
  size_t s_stride = typ_sz;
  char *dest_p = GET_ADDRESS_FROM_DESC(dest);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(dest_p) &&  ALIGNED_i1(source_p)) {

    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	*(i1 *)dest_p = *(i1 *)source_p;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(dest_p) &&  ALIGNED_i2(source_p) ) {

    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	*(i2 *)dest_p = *(i2 *)source_p;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(dest_p) &&  ALIGNED_r4(source_p) ) {

    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	*(ui4 *)dest_p = *(ui4 *)source_p;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(dest_p) &&  ALIGNED_r8(source_p) ) {

    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	*(ui8 *)dest_p = *(ui8 *)source_p;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(dest_p) &&  ALIGNED_r16(source_p) ) {

    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	*(ui16 *)dest_p = *(ui16 *)source_p;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  } else {
    while (counter[dest_rank] < dest_extent[dest_rank] ) {
      for ( i = 0; i < a_size; i ++ ) {
	char *ap = dest_p;
	char *sp = source_p;
	if (typ_sz > BIGDEFAULTSZ)
	  (void) memcpy (ap, sp, typ_sz);
	else
	  for (j = 0; j < typ_sz; j ++)  *ap++ = *sp++;
	source_p += s_stride;
	dest_p += a_stride;
      }

      counter[0] = a_size;
      j = 0;
      while ((counter[j] == dest_extent[j]) && (j < dest_rank)) {
	dest_p += dest_offset[j];
	counter[j+1]++;
	counter[j] = 0;
	j ++;
      }
    }

  }
}
示例#5
0
void 
_TRANSFER(
DopeVectorType	*result,
DopeVectorType	*array,
DopeVectorType	*mold,
i4 *size)
{
  char  * result_p, * result_b ;
  char  * array_p, * array_b ;
  char  * mold_p, * mold_b ;

  size_t src_extent [MAX_NARY_DIMS] ;
  size_t src_stride [MAX_NARY_DIMS] ;
  size_t src_offset [MAX_NARY_DIMS] ;
  size_t counter[MAX_NARY_DIMS] ;

  size_t res_stride [MAX_NARY_DIMS] ;
  size_t res_extent [MAX_NARY_DIMS] ;
  size_t res_offset [MAX_NARY_DIMS] ;

  int32_t j,ii;
  char *rp, *ap ;
  int32_t res_rank ;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz   = GET_ELEMENT_SZ_FROM_DESC(array);

  size_t a_size,a_stride,r_stride, i,k ;
  int8_t  zero_szd_source = FALSE;
  int8_t  byte_aligned = FALSE;

  int32_t ddim ;

  size_t  num_trues ;
  int32_t local_alloc ;
  size_t  tot_ext ;
  size_t  str_sz  ;

  size_t src_size ;

  size_t  res_sz;
  size_t  xfer_sz;
  size_t  tot_sz;

  byte_aligned = GET_BYTEALIGNED_FROM_DESC(mold) ;

  src_extent[0] = 1;
  src_stride[0] = GET_ELEMENT_SZ_FROM_DESC(array);
  src_offset[0] = 0 ;
  counter[0]    = 0 ;
  src_size      = GET_ELEMENT_SZ_FROM_DESC(array);
  tot_ext = 1;

  for ( j = 0 ; j <= src_rank ; j ++  ) {
    src_extent[j]  = GET_EXTENT_FROM_DESC(array,j) ;
    src_stride[j]  = GET_STRIDE_FROM_DESC(array,j) ;
    counter[j] = 0 ;
    zero_szd_source = zero_szd_source || (src_extent[j] == 0) ;
    src_size *= src_extent[j];
  }

  for ( j = 1 ; j <= src_rank ; j ++  )
    src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1]))  ;

  res_sz   = GET_ELEMENT_SZ_FROM_DESC(mold);
  res_rank = GET_RANK_FROM_DESC(result);
  res_offset[0] = 0 ;
  res_stride[0] = res_sz ;

  k = 0 ;
  if (size) {
    if (*size > 0)
      tot_ext = * size ;
    else {
      zero_szd_source = TRUE;
      tot_ext = 0 ;
    }
    tot_sz = tot_ext * res_sz ;

  } else {
    if (GET_RANK_FROM_DESC(mold) == 0) {
      tot_ext = 1 ;
      tot_sz  = res_sz ;

    } else {
      tot_ext = GET_EXTENT_FROM_DESC(mold,0);

      tot_sz  = src_size ;
      tot_ext = tot_sz/res_sz ;
      if (tot_sz%res_sz)
	tot_ext ++ ;
    }
  }

  if (!GET_ASSOCIATED_FROM_DESC(result)) {

    size_t  nbytes  ;
    char    *p      ;

    SET_ADDRESS_IN_DESC(result,NULL);
    SET_ORIG_BS_IN_DESC(result,NULL) ;
    SET_ORIG_SZ_IN_DESC(result,0) ;

    p = NULL ;
    nbytes  = tot_ext * res_sz ;
    str_sz  = MK_STRIDE(byte_aligned,res_sz);

    if (res_rank > 0) {
      SET_LBOUND_IN_DESC(result,0,1);
      SET_EXTENT_IN_DESC(result,0,tot_ext);
      SET_STRMULT_IN_DESC(result,0, str_sz );
    }

    if (nbytes > 0 ) {
      p = (void *) malloc (nbytes);
      if (p == NULL)
	ERROR(_LELVL_ABORT, FENOMEMY);

      SET_ADDRESS_IN_DESC(result,p);
    }

    SET_CONTIG_IN_DESC(result);
    SET_ASSOCIATED_IN_DESC(result);
    SET_CONTIG_IN_DESC(result);
    if (GET_DV_ASCII_FROM_DESC(result)) {
      SET_CHARPTR_IN_DESC(result,p,res_sz << 3);
    }
    SET_ORIG_BS_IN_DESC(result,p) ;
    SET_ORIG_SZ_IN_DESC(result,nbytes * 8 ) ;
  }

  if (res_rank > 0)
    res_stride[0] = GET_STRIDE_FROM_DESC(result,0) ;

  if (src_rank < 0) src_rank ++ ;

  result_b = GET_ADDRESS_FROM_DESC(result);
  array_b = GET_ADDRESS_FROM_DESC(array);

  if (zero_szd_source)
    return ;

  a_size   = src_extent[0] ;
  a_stride = src_stride[0] ;
  r_stride = res_stride[0] ;
  array_p = GET_ADDRESS_FROM_DESC(array);
  result_p = GET_ADDRESS_FROM_DESC(result);

  {
    while (counter[src_rank] < src_extent[src_rank] ) {
      {
	size_t todo_s,todo_r ;
	todo_r = res_sz ;

	for ( i = 0 ; i < a_size ; i ++ ) {

	  ap = array_p ;
	  rp = result_p ;
	  todo_s = typ_sz ;
	  while (todo_s != 0) {
	    xfer_sz = todo_s ;
	    if (xfer_sz > todo_r) xfer_sz = todo_r ;
	    for (j = 0 ; j < xfer_sz ; j ++)  *rp++ = *ap ++ ;

	    todo_r -= xfer_sz ;
	    todo_s -= xfer_sz ;

	    if (todo_r != 0)
	      result_p += xfer_sz ;
	    else {
	      result_b += r_stride ;
	      result_p  = result_b ;
	      todo_r    = res_sz ;
	    }
	    k += xfer_sz ;
	    if (k >= tot_sz)
	      return ;
	  }
	  array_p += a_stride ;
	}
      }       
      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  }
}
示例#6
0
void 
_SPREAD(
DopeVectorType	*result,
DopeVectorType	*array,
i4 *dim,
i4 *ncopies)
{
  char  * result_p, * result_b ;
  char  * array_p, * array_b ;

  size_t src_extent [MAX_NARY_DIMS] ;
  size_t src_stride [MAX_NARY_DIMS] ;
  size_t src_offset [MAX_NARY_DIMS] ;
  size_t counter[MAX_NARY_DIMS] ;

  size_t res_stride [MAX_NARY_DIMS] ;
  size_t res_extent [MAX_NARY_DIMS] ;
  size_t res_offset [MAX_NARY_DIMS] ;

  int32_t j,ii;
  char *rp, *ap ;
  int32_t res_rank ;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz   = GET_ELEMENT_SZ_FROM_DESC(array);

  size_t a_size,a_stride,r_stride, i,k ;
  int8_t  zero_szd_source = FALSE;
  int8_t  byte_aligned = FALSE;

  int32_t ddim ;

  size_t  num_trues ;
  int32_t local_alloc ;
  size_t  tot_ext ;
  size_t  str_sz  ;

  int32_t nc ;
  size_t src_size ;

  size_t  res_sz;
  size_t  xfer_sz;
  size_t  tot_sz;

  ddim = (*dim) - 1 ;

  if ((ddim > src_rank + 1) || (ddim < 0))
    ERROR(_LELVL_ABORT,FESCIDIM);

  nc = * ncopies ;
  if (nc < 0) nc = 0 ;

  src_extent[0]  = 1;
  src_stride[0]  = 0;
  src_offset[0]  = 0;

  for ( j = 0 ; j <= src_rank ; j ++  )  {
    src_extent[j]  = GET_EXTENT_FROM_DESC(array,j) ;
    src_stride[j]  = GET_STRIDE_FROM_DESC(array,j) ;
  }

  for ( j = 1 ; j <= src_rank ; j ++  ) {
    src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1]))  ;
  }

  res_rank = src_rank + 2 ;
  if (src_rank < 0 )
    src_rank = 0 ;

  byte_aligned = GET_BYTEALIGNED_FROM_DESC(result);

  if (!GET_ASSOCIATED_FROM_DESC(result)) {

    size_t  nbytes  ;
    char    *p      ;

    SET_ADDRESS_IN_DESC(result,NULL);
    SET_ORIG_BS_IN_DESC(result,NULL) ;
    SET_ORIG_SZ_IN_DESC(result,0) ;

    p = NULL ;
    tot_ext = 1 ;
    nbytes  = typ_sz ;

    str_sz = MK_STRIDE(byte_aligned,typ_sz);

    for ( i = 0 , j = 0 ; i < res_rank ; i ++) {
      size_t ex ;
      SET_LBOUND_IN_DESC(result,i,1);

      if (i != ddim ) {
	ex = src_extent[j];
	j ++ ;
      } else {
	ex = nc ;
      }
      SET_EXTENT_IN_DESC(result,i,ex);
      SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
      tot_ext *= ex;
      nbytes  *= ex;
    }

    if (nbytes > 0) {
      p = (void *) malloc (nbytes);
      if (p == NULL)
	ERROR(_LELVL_ABORT, FENOMEMY);

      SET_ADDRESS_IN_DESC(result,p);
    }

    SET_CONTIG_IN_DESC(result);
    SET_ASSOCIATED_IN_DESC(result);
    if (GET_DV_ASCII_FROM_DESC(array)) {
      SET_CHARPTR_IN_DESC(result,p,typ_sz);
    }
    SET_ORIG_BS_IN_DESC(result,p) ;
    SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ;
  }

  res_stride[res_rank-1] = GET_STRIDE_FROM_DESC(result,ddim) ;
  res_extent[res_rank-1] = GET_EXTENT_FROM_DESC(result,ddim) ;

  for ( j = 0 , k = 0; j < res_rank ; j ++  ) {
    if (j != ddim ) {
      res_stride[k]  = GET_STRIDE_FROM_DESC(result,j) ;
      res_extent[k]  = GET_EXTENT_FROM_DESC(result,j) ;
      k ++ ;
    }
    counter[j] = 0 ;
  }
  for ( j = 1 ;  j < res_rank ; j ++  )
    res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1])) ;

  if (zero_szd_source)
    return ;

  a_size   = src_extent[0] ;
  a_stride = src_stride[0] ;
  r_stride = res_stride[0] ;
  array_p = GET_ADDRESS_FROM_DESC(array);
  result_p = GET_ADDRESS_FROM_DESC(result);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) &&  ALIGNED_i1(result_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
	  *(i1 *)rp1 = *(i1 *)array_p ;
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) &&  ALIGNED_i2(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
	  *(i2 *)rp1 = *(i2 *)array_p ;
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) &&  ALIGNED_r4(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
#ifdef KEY /* bug 8062 */
	  /* Using IEEE FP on non-FP data might change bits during assign */
	  *(ui4 *)rp1 = *(ui4 *)array_p ;
#else /* KEY bug 8062 */
	  *(r4 *)rp1 = *(r4 *)array_p ;
#endif /* KEY bug 8062 */
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) &&  ALIGNED_r8(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
#ifdef KEY /* bug 8062 */
	  /* Using IEEE FP on non-FP data might change bits during assign */
	  *(ui8 *)rp1 = *(ui8 *)array_p ;
#else /* KEY bug 8062 */
	  *(r8 *)rp1 = *(r8 *)array_p ;
#endif /* KEY bug 8062 */
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) &&  ALIGNED_r16(result_p) ) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
#ifdef KEY /* Bug 4039 */
	  *(ui16 *)rp1 = *(ui16 *)array_p ;
#else /* KEY Bug 4039 */
	  *(r16 *)rp1 = *(r16 *)array_p ;
#endif /* KEY Bug 4039 */
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else {
    while (counter[src_rank] < src_extent[src_rank] ) {
      for ( i = 0 ; i < a_size ; i ++ ) {
	char * rp1;
	rp1 = result_p  ;
	for (k = 0 ; k < nc ; k ++ ) {
	  rp = rp1 ;
	  ap  = array_p ;
	  if (typ_sz > BIGDEFAULTSZ)
	    (void) memcpy (rp, ap, typ_sz);
	  else
	    for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	  rp1 += res_stride[res_rank-1] ;
	}
	result_p += r_stride ;
	array_p += a_stride ;
      }

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  }
}
示例#7
0
void 
_EOSHIFT(
DopeVectorType	*result,
DopeVectorType	*array,
DopeVectorType	*shift,
DopeVectorType	*boundary,
i4 *dim)
{
  char  * result_p, * result_b ;
  char  * array_p, * array_b ;
  char * shift_p, * shift_b ;
  char * boundary_p, * boundary_b ;

  size_t src_extent [MAX_NARY_DIMS] ;
  size_t src_stride [MAX_NARY_DIMS] ;
  size_t src_offset [MAX_NARY_DIMS] ;
  size_t counter[MAX_NARY_DIMS] ;

  size_t res_stride [MAX_NARY_DIMS] ;
  size_t res_extent [MAX_NARY_DIMS] ;
  size_t res_offset [MAX_NARY_DIMS] ;

  int32_t j,ii;
  char *rp, *ap ;
  int32_t res_rank ;
  int32_t shf_rank ;
  int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;

  size_t typ_sz   = GET_ELEMENT_SZ_FROM_DESC(array);

  size_t a_size,a_stride,r_stride, i,k ;
  int8_t  zero_szd_source = FALSE;
  int8_t  byte_aligned = FALSE;

  size_t  r_offs1,r_offs2 ;
  size_t  a_offs,a_bump,r_bump ;
  size_t ll1,ll2;
  int64_t shft,shf_typ_sz  ;
  char  *rp1,  *ap1, *ap2  ;
  int32_t ddim ;

  size_t shf_stride [MAX_NARY_DIMS] ;
  size_t shf_offset [MAX_NARY_DIMS] ;
  size_t fill_stride [MAX_NARY_DIMS] ;
  size_t fill_offset [MAX_NARY_DIMS] ;
  size_t f_stride ;
  int32_t fill_rank  ;
  size_t  num_trues ;
  int32_t local_alloc ;
  size_t  tot_ext ;
  size_t  str_sz  ;

  size_t src_size ;

  size_t  res_sz;
  size_t  xfer_sz;
  size_t  tot_sz;

  int8_t computed_shift = FALSE ;
  const  r16  zero_fill = 0;
  const  char zero_sp[17] = "                " ;

  ddim = (*dim) - 1 ;

  if ((ddim > src_rank) || (ddim < 0))
    ERROR(_LELVL_ABORT,FESCIDIM);

  src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ;
  src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ;
  byte_aligned  = GET_BYTEALIGNED_FROM_DESC(array) ;

  for ( j = 0, k = 1 ; j <= src_rank ; j ++  ) {
    if (j != ddim ) {
      src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ;
      src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ;
      src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1]))  ;
      k++ ;
    }
    counter[j] = 0 ;
    fill_offset[j] = 0 ;
    shf_offset[j] = 0 ;
    zero_szd_source = zero_szd_source || (src_extent[j] == 0) ;
  }

  if (!GET_ASSOCIATED_FROM_DESC(result)) {

    size_t  nbytes  ;
    size_t  ext  ;
    char    *p      ;

    SET_ADDRESS_IN_DESC(result,NULL);
    SET_ORIG_BS_IN_DESC(result,NULL) ;
    SET_ORIG_SZ_IN_DESC(result,0) ;

    p = NULL ;
    tot_ext = 1 ;
    nbytes  = typ_sz ;
    str_sz  = MK_STRIDE(byte_aligned,typ_sz);

    for ( i = 0 ; i <= src_rank ; i ++) {
      ext = GET_EXTENT_FROM_DESC(array,i) ;
      SET_LBOUND_IN_DESC(result,i,1);
      SET_EXTENT_IN_DESC(result,i,ext);
      SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
      tot_ext *= ext;
      nbytes  *= ext;
    }

    if (nbytes > 0) {
      p = (void *) malloc (nbytes);
      if (p == NULL)
	ERROR(_LELVL_ABORT, FENOMEMY);

      SET_ADDRESS_IN_DESC(result,p);
    }

    SET_ASSOCIATED_IN_DESC(result);
    SET_CONTIG_IN_DESC(result);
    if (GET_DV_ASCII_FROM_DESC(array)) {
      SET_CHARPTR_IN_DESC(result,p,typ_sz);
    }
    SET_ORIG_BS_IN_DESC(result,p) ;
    SET_ORIG_SZ_IN_DESC(result,nbytes * 8 ) ;
  }

  res_stride[0] = GET_STRIDE_FROM_DESC(result,ddim) ;

  for ( j = 0, k = 1  ; j <= src_rank ; j ++  ) {
    if (j != ddim ) {
      res_stride[k] = GET_STRIDE_FROM_DESC(result,j) ;
      res_offset[k-1] = res_stride[k] - (res_stride [k-1] * (src_extent[k-1])) ;
      k++ ;
    }
  }

  shf_typ_sz = GET_ELEMENT_SZ_FROM_DESC(shift);
  shf_rank   = GET_RANK_FROM_DESC(shift);
  shift_p    = GET_ADDRESS_FROM_DESC(shift);

  shf_stride[0] = 0  ;
  for ( j = 0 ; j < shf_rank ; j ++  ) {
    shf_stride[j] = GET_STRIDE_FROM_DESC(shift,j) ;
  }

  for ( j = 1 ; j < shf_rank ; j ++  ) {
    shf_offset[j] = shf_stride[j] - (shf_stride [j-1] * (src_extent[j])) ;
  }

  if (boundary != NULL) {
    boundary_p = GET_ADDRESS_FROM_DESC(boundary);
    fill_rank = GET_RANK_FROM_DESC(boundary) ;

    fill_stride[0] = 0;
    for ( j = 0 ; j < fill_rank ; j ++  ) {
      fill_stride[j] = GET_STRIDE_FROM_DESC(boundary,j) ;
    }

    fill_offset[0] = 0 ;
    for ( j = 1 ; j < fill_rank ; j ++  )  {
      fill_offset[j] = fill_stride[j] - (fill_stride [j-1] * (src_extent[j]))  ;
    }

  } else {

    for ( j = 0 ; j <= src_rank ; j ++  ) {
      fill_stride[j] = 0 ;
      fill_offset[0] = 0 ;
    }
    if (GET_DV_ASCII_FROM_DESC(array))
      boundary_p = (char *) & zero_sp;
    else 
      boundary_p = (char *) & zero_fill;
  }
  a_bump = src_extent[0] * src_stride[0] ;
  r_bump = src_extent[0] * res_stride[0] ;

  if (zero_szd_source)
    return ;

  a_size   = src_extent[0] ;
  a_stride = src_stride[0] ;
  r_stride = res_stride[0] ;
  f_stride = fill_stride[0] ;
  array_p = GET_ADDRESS_FROM_DESC(array);
  result_p = GET_ADDRESS_FROM_DESC(result);

  if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) &&  ALIGNED_i1(result_p) &&  ALIGNED_i1(boundary_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      i1 lfill = 0 ;

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(i1 *)rp1 = *(i1 *)ap1 ;
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
      lfill = *(i1 *) boundary_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(i1 *)rp1 = lfill ;
	rp1 += r_stride ;
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) &&  ALIGNED_i2(result_p) && ALIGNED_i2(boundary_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
      i2 lfill = 0 ;

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	*(i2 *)rp1 = *(i2 *)ap1 ;
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
      lfill = *(i2 *) boundary_p ;

      for ( k = 0 ; k < ll2 ; k ++ ) {
	*(i2 *)rp1 = lfill ;
	rp1 += r_stride ;
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) &&  ALIGNED_r4(result_p) && ALIGNED_r4(boundary_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
#ifdef KEY /* bug 8062 */
      /* Using IEEE FP on non-FP data might change bits during assign */
      ui4 lfill = 0 ;
#else /* KEY bug 8062 */
      r4 lfill = 0 ;
#endif /* KEY bug 8062 */

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
#ifdef KEY /* bug 8062 */
	/* Using IEEE FP on non-FP data might change bits during assign */
	*(ui4 *)rp1 = *(ui4 *)ap1 ;
#else /* KEY bug 8062 */
	*(r4 *)rp1 = *(r4 *)ap1 ;
#endif /* KEY bug 8062 */
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
#ifdef KEY /* bug 8062 */
      /* Using IEEE FP on non-FP data might change bits during assign */
      lfill = *(ui4 *) boundary_p ;
#else /* KEY bug 8062 */
      lfill = *(r4 *) boundary_p ;
#endif /* KEY bug 8062 */

      for ( k = 0 ; k < ll2 ; k ++ ) {
#ifdef KEY /* bug 8062 */
	/* Using IEEE FP on non-FP data might change bits during assign */
	*(ui4 *)rp1 = lfill ;
#else /* KEY bug 8062 */
	*(r4 *)rp1 = lfill ;
#endif /* KEY bug 8062 */
	rp1 += r_stride ;
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) &&  ALIGNED_r8(result_p) && ALIGNED_r8(boundary_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
#ifdef KEY /* bug 8062 */
      /* Using IEEE FP on non-FP data might change bits during assign */
      ui8 lfill = 0 ;
#else /* KEY bug 8062 */
      r8 lfill = 0 ;
#endif /* KEY bug 8062 */

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
#ifdef KEY /* bug 8062 */
	/* Using IEEE FP on non-FP data might change bits during assign */
	*(ui8 *)rp1 = *(ui8 *)ap1 ;
#else /* KEY bug 8062 */
	*(r8 *)rp1 = *(r8 *)ap1 ;
#endif /* KEY bug 8062 */
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
#ifdef KEY /* bug 8062 */
      /* Using IEEE FP on non-FP data might change bits during assign */
      lfill = *(ui8 *) boundary_p ;
#else /* KEY bug 8062 */
      lfill = *(r8 *) boundary_p ;
#endif /* KEY bug 8062 */

      for ( k = 0 ; k < ll2 ; k ++ ) {
#ifdef KEY /* bug 8062 */
	/* Using IEEE FP on non-FP data might change bits during assign */
	*(ui8 *)rp1 = lfill ;
#else /* KEY bug 8062 */
	*(r8 *)rp1 = lfill ;
#endif /* KEY bug 8062 */
	rp1 += r_stride ;
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) &&  ALIGNED_r16(result_p) && ALIGNED_r16(boundary_p)) {

    while (counter[src_rank] < src_extent[src_rank] ) {
#ifdef KEY /* Bug 4039 */
      ui16 lfill = { 0, 0 } ;
#else /* KEY Bug 4039 */
      r16 lfill = 0 ;
#endif /* KEY Bug 4039 */

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
#ifdef KEY /* Bug 4039 */
	*(ui16 *)rp1 = *(ui16 *)ap1 ;
#else /* KEY Bug 4039 */
	*(r16 *)rp1 = *(r16 *)ap1 ;
#endif /* KEY Bug 4039 */
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
#ifdef KEY /* Bug 4039 */
      lfill = *(ui16 *) boundary_p ;
#else /* KEY Bug 4039 */
      lfill = *(r16 *) boundary_p ;
#endif /* KEY Bug 4039 */

      for ( k = 0 ; k < ll2 ; k ++ ) {
#ifdef KEY /* Bug 4039 */
	*(ui16 *)rp1 = lfill ;
#else /* KEY Bug 4039 */
	*(r16 *)rp1 = lfill ;
#endif /* KEY Bug 4039 */
	rp1 += r_stride ;
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  } else {
    while (counter[src_rank] < src_extent[src_rank] ) {
      char * lfill ;

      if (!computed_shift) {
	switch (shf_typ_sz) {
	case sizeof(i1): 
	  shft = * (i1 *)shift_p ; 
	  break ;
	case sizeof(i2): 
	  shft = * (i2 *)shift_p ; 
	  break ;
	case sizeof(i4): 
	  shft = * (i4 *)shift_p ; 
	  break ;
	case sizeof(i8): 
	  shft = * (i8 *)shift_p ; 
	  break ;
	}
	shift_p += shf_stride[0];

	if (shft < 0 ) {
	  if (shft <  (-(int64_t) src_extent[0]))
	    shft = (-(int64_t) src_extent[0]);

	  ll1 = ((int64_t)src_extent[0]) - abs(shft)  ;
	  ll2 = abs(shft) ;
	  r_offs1  = res_stride[0] * ll2 ;
	  r_offs2  = 0 ;
	  a_offs   = 0 ;

	} else {
	  if (shft > (int64_t)src_extent[0])
	    shft = (int64_t)src_extent[0] ;
	  ll1 = ((int64_t)src_extent[0]) -  shft ;
	  ll2 = shft ;
	  r_offs1  = 0 ;
	  r_offs2  = res_stride[0] * ll1 ;
	  a_offs   = a_stride * ll2 ;
	}
	if (shf_rank == 0 )
	  computed_shift = TRUE;
      }
      ap1 = array_p + a_offs ;
      rp1 = result_p + r_offs1;

      for ( k = 0 ; k < ll1 ; k ++ )  {
	ap = ap1 ;
	rp = rp1 ;
	if (typ_sz > BIGDEFAULTSZ)
	  (void) memcpy (rp, ap, typ_sz);
	else
	  for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap ++ ;
	rp1 += r_stride ;
	ap1 += a_stride ;
      }

      rp1 = result_p + r_offs2 ;
      lfill = boundary_p ;

      if (boundary != NULL) {
	for ( k = 0 ; k < ll2 ; k ++ ) {
	  ap2 = lfill ;
	  rp = rp1 ;
	  if (typ_sz > BIGDEFAULTSZ)
	    (void) memcpy (rp, ap2, typ_sz);
	  else
	    for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *ap2 ++ ;
	  rp1 += r_stride ;
	}
      } else {
	for ( k = 0 ; k < ll2 ; k ++ ) {
	  rp = rp1 ;
	  for (j = 0 ; j < typ_sz ; j ++)  *rp++ = *lfill ;
	  rp1 += r_stride ;
	}
      }

      array_p += a_bump ;
      result_p += r_bump ;
      boundary_p += f_stride ;

      counter[0] = a_size  ;
      j = 0 ;
      while ((counter[j] == src_extent[j]) && (j < src_rank)) {
	array_p += src_offset[j] ;
	result_p += res_offset[j] ;
	shift_p  += shf_offset[j] ;
	boundary_p   += fill_offset[j] ;
	counter[j+1]++ ;
	counter[j] = 0 ;
	j ++ ;
      }

    }
  }
}