Esempio n. 1
0
/* Calculates  the  length  of  the string s, not including the terminating
 * \0 character.
 */
size_t strlen(const char *s)
{
  size_t len;
  unsigned int cnt, cmp, skip, mask;
  vec_uchar16 *ptr, data;

  /* Compensate for initial mis-aligned string.
   */
  ptr = (vec_uchar16 *)s;
  skip = (unsigned int)(ptr) & 15;
  mask = 0xFFFF >> skip;

  data = *ptr++;
  cmp = spu_extract(spu_gather(spu_cmpeq(data, 0)), 0);
  cmp &= mask;

  cnt = spu_extract(spu_cntlz(spu_promote(cmp, 0)), 0);
  len = cnt - (skip + 16);

  while (cnt == 32) {
    data = *ptr++;
    len -= 16;
    cnt = spu_extract(spu_cntlz(spu_gather(spu_cmpeq(data, 0))), 0);
    len += cnt;
  }

  return (len);
}
Esempio n. 2
0
unsigned int
__mfc_tag_release (unsigned int tag)
{
  vector unsigned int is_invalid;
  vector unsigned int mask = (vector unsigned int)
	{ 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
  vector signed int zero = (vector signed int) { 0, 0, 0, 0 };

  vector signed int has_been_reserved;

  /* Check if the tag is out of range.  */
  is_invalid = spu_cmpgt (spu_promote (tag, 0), 31);

  /* Check whether the tag has been reserved, set to all 1 if has not
     been reserved, 0 otherwise.  */
  has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag);
  has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved);

  /* Set invalid.  */
  is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid);

  mask = spu_rlmask (mask, (int)(-tag));
  __mfc_tag_table = spu_or (__mfc_tag_table, mask);

  return spu_extract(is_invalid, 0);
}
Esempio n. 3
0
vec_ullong2 bitDiff_d2(vec_double2 ref, vec_double2 vals) {
   double ref0, ref1, vals0, vals1;
   long long refi0, refi1, valsi0, valsi1, diff0, diff1;
   vec_ullong2 bits;

   ref0 = spu_extract(ref,0);
   ref1 = spu_extract(ref,1);
   vals0 = spu_extract(vals,0);
   vals1 = spu_extract(vals,1);

   refi0 = make_ulonglong(ref0);
   refi1 = make_ulonglong(ref1);
   valsi0 = make_ulonglong(vals0);
   valsi1 = make_ulonglong(vals1);

   diff0 = refi0 - valsi0;
   diff1 = refi1 - valsi1;

   if ( diff0 < 0 )
   {
      diff0 = valsi0 - refi0;
   }

   if ( diff1 < 0 )
   {
      diff1 = valsi1 - refi1;
   }

   bits = spu_promote( (unsigned long long)ceil(log2((double)diff0)), 0 );
   bits = spu_insert( (unsigned long long)ceil(log2((double)diff1)), bits, 1 );

   return bits;
}
Esempio n. 4
0
vec_ullong2 ulpDiff_d2(vec_double2 ref, vec_double2 vals) {
   double ref0, ref1, vals0, vals1;
   long long refi0, refi1, valsi0, valsi1, diff0, diff1;
   vec_ullong2 ulps;

   ref0 = spu_extract(ref,0);
   ref1 = spu_extract(ref,1);
   vals0 = spu_extract(vals,0);
   vals1 = spu_extract(vals,1);

   refi0 = make_ulonglong(ref0);
   refi1 = make_ulonglong(ref1);
   valsi0 = make_ulonglong(vals0);
   valsi1 = make_ulonglong(vals1);

   diff0 = refi0 - valsi0;
   diff1 = refi1 - valsi1;

   if ( diff0 < 0 )
   {
      diff0 = valsi0 - refi0;
   }

   if ( diff1 < 0 )
   {
      diff1 = valsi1 - refi1;
   }

   ulps = spu_promote( (unsigned long long)diff0, 0 );
   ulps = spu_insert( (unsigned long long)diff1, ulps, 1 );

   return ulps;
}
Esempio n. 5
0
void writeTriangleBuffer(Triangle* endTriangle)
{
	if (endTriangle != _currentTriangle) {
		int length = ( ((char*)endTriangle) - _currentTriangleBuffer + 127) & ~127;
		unsigned short endTriangleBase = (((char*)endTriangle) - ((char*)_currentTriangle)) + _currentTriangleOffset;
		vec_ushort8 v_new_end = spu_promote(endTriangleBase, 1);

		// calculate genuine next pointer ( rewind==0 -> next, rewind!=0 -> 0 )
		unsigned short next_pointer = spu_extract( spu_andc( v_new_end, _currentTriangleRewind ), 1 );
		_currentTriangle->next_triangle = next_pointer;
		
//		printf("current=0x%x, endTriBase=0x%x, next_pointer=0x%x\n", _currentTriangleOffset, endTriangleBase, next_pointer);

		// DMA the triangle data out
		spu_mfcdma64(_currentTriangleBuffer, mfc_ea2h(_currentTriangleBufferEA), mfc_ea2l(_currentTriangleBufferEA), length, 0, MFC_PUT_CMD);

		// update the information in the cache line
		_currentTriangleRewind = spu_splats(next_pointer);		// re-use this variable as we don't need it anymore
		char* dstart = ((char*)&_currentTriangleRewind) + (_currentTriangleCacheEndTriangleEAL & 15);
		spu_mfcdma64(dstart, _currentTriangleCacheEndTriangleEAH, _currentTriangleCacheEndTriangleEAL, sizeof(short), 0, MFC_PUTB_CMD);

//		printf("writing from %x to %x:%x\n", dstart, _currentTriangleCacheEndTriangleEAH, _currentTriangleCacheEndTriangleEAL);

		// finally invalidate the triangle info
		_currentTriangle = NULL;

		// and make sure the DMA completed
		mfc_write_tag_mask(1<<0);
		mfc_read_tag_status_all();
	}
}
Esempio n. 6
0
inline void merge_cache_blocks(RenderableCacheLine* cache)
{
    vec_uchar16 next = cache->chunkNext;

    for (;;) {
        vec_uchar16 nextnext = spu_shuffle(next, next, next);
        vec_uchar16 nextmask = spu_and(next, spu_splats((unsigned char)CHUNKNEXT_MASK));

        vec_ushort8 firstblock0 = spu_cmpeq( cache->chunkStart[0], 0);
        vec_ushort8 firstblock1 = spu_cmpeq( cache->chunkStart[1], 0);
        // change next to word offset, note we don't care what the low bit shifted in is
        vec_uchar16 firstshuf = (vec_uchar16) spu_sl( (vec_ushort8)nextmask, 1 );
        vec_uchar16 first = (vec_uchar16) spu_shuffle( firstblock0, firstblock1, firstshuf );

        vec_ushort8 tri0 = cache->chunkTriangle[0];
        vec_ushort8 tri1 = cache->chunkTriangle[1];
        vec_uchar16 trishufhi = spu_or ( firstshuf, spu_splats((unsigned char) 1));
        vec_uchar16 trishuflo = spu_and( firstshuf, spu_splats((unsigned char) 254));

        vec_ushort8 ntri0 = spu_shuffle( tri0, tri1, spu_shuffle( trishuflo, trishufhi, SHUF0 ) );
        vec_ushort8 ntri1 = spu_shuffle( tri0, tri1, spu_shuffle( trishuflo, trishufhi, SHUF1 ) );

        vec_ushort8 trieq0 = spu_cmpeq( tri0, ntri0 );
        vec_ushort8 trieq1 = spu_cmpeq( tri1, ntri1 );

        vec_uchar16 trieq = (vec_uchar16) spu_shuffle( trieq0, trieq1, MERGE );
        vec_uchar16 combi = spu_orc(first, trieq);

        vec_uchar16 canmerge = spu_cmpgt( spu_nor(spu_or(next, nextnext), combi), 256-CHUNKNEXT_BUSY_BIT );

        vec_uint4 gather = spu_gather( canmerge );

        vec_uint4 mergeid = spu_sub( spu_cntlz( gather ), spu_promote((unsigned int)16, 0));

        if( !spu_extract(gather, 0) ) {
            return;
        }

        //	unsigned int firstchunk = spu_extract(mergeid, 0);
        //	unsigned int nextchunk = cache->chunkNextArray[firstchunk];
        vec_uint4 v_chunkNext = (vec_uint4) si_rotqby( (qword) next, (qword) spu_add(mergeid,13) );
        vec_uint4 v_chunkNextNext = (vec_uint4) si_rotqby( (qword) next, (qword) spu_add(v_chunkNext,13) );

        // cache->chunkNextArray[firstchunk] = cache->chunkNextArray[nextchunk];
        next = spu_shuffle( (vec_uchar16) v_chunkNextNext, next, (vec_uchar16) si_cbd( (qword) mergeid, 0 ) );

        // cache->chunkNextArray[nextchunk] = CHUNKNEXT_FREE_BLOCK;
        next = spu_shuffle( spu_splats( (unsigned char) CHUNKNEXT_FREE_BLOCK), next, (vec_uchar16) si_cbd( (qword) v_chunkNext, 0 ) );

        // this is for debug use only, it's not really needed...
        // cache->chunkStartArray[nextchunk] = -1;
        cache->chunkStartArray[ spu_extract(v_chunkNext,0) & 255 ] = -1;

        cache->chunkNext = next;
    }
}
Esempio n. 7
0
unsigned int
__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags)
{
  vector unsigned int table_copy, tmp, tmp1;
  vector unsigned int one = (vector unsigned int)
        { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
  vector unsigned int is_invalid;
  unsigned int last_tag;
  vector unsigned int has_been_reserved;

  last_tag = first_tag + number_of_tags;

  table_copy = spu_sl (one, number_of_tags);
  table_copy = spu_rl (table_copy, -last_tag);
  table_copy = spu_xor (table_copy, -1);

  /* Make sure the tags are in range and valid.  */
  tmp = spu_cmpgt (spu_promote(last_tag, 0), 32);
  tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32);
  is_invalid =  spu_cmpgt (spu_promote(first_tag, 0), 31);

  /* All bits are set to 1 if invalid, 0 if valid.  */
  is_invalid = spu_or (tmp, is_invalid);
  is_invalid = spu_or (tmp1, is_invalid);

  /* check whether these tags have been reserved */
  tmp = spu_rlmask (one, (int)-number_of_tags);
  tmp1 = spu_sl (__mfc_tag_table, first_tag);
  has_been_reserved = spu_cmpgt(tmp1, tmp);

  is_invalid = spu_or (has_been_reserved, is_invalid);

  table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy);
  __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid);

  return spu_extract (is_invalid, 0);
}
Esempio n. 8
0
unsigned long long ulpDiff_d(double ref, double val) {
  return spu_extract(ulpDiff_d2(spu_promote(ref,0), spu_promote(val,0)), 0);
}
Esempio n. 9
0
unsigned int ulpDiff_f(float ref, float val) {
  return spu_extract(ulpDiff_f4(spu_promote(ref,0), spu_promote(val,0)), 0);
}
Esempio n. 10
0
/* tests if ray intersects a given sphere */
int ray_intersect_sphere(const ray_t *ray, const sphere_t* sphere,
							point_t *pt, float *distance)
{
	vector4_t	tmp;	/* used to hold scale of ray direction */
	float A = 1;	/* since we know ray direction is normalized */
	float dx = ray->origin.x - sphere->center.x;
	float dy = ray->origin.y - sphere->center.y;
	float dz = ray->origin.z - sphere->center.z;
	float B = 2 * (
		ray->direction.x * (dx) +
		ray->direction.y * (dy) +
		ray->direction.z * (dz));
	float C = (dx * dx + dy * dy + dz * dz) 
		- (sphere->radius * sphere->radius);
	float det = (B*B) - (4 * A * C);
	float wOne;	/* distance to first intersection */
	float wTwo;	/* distance to second intersection */
	float w = 0.0f;	/* least positive w */
#ifdef __SPU__
	vector float vTmp;
	float	scalarTmp;
#endif

	if(det < 0.0f)
	{	/* intersection is behind ray so none at all*/
		return 0;
	}
	else
	{	/* no need to use A since it's 1 */
#ifdef __SPU__
		vTmp = spu_promote(det, 0);
		scalarTmp = spu_extract(
				_sqrtf4(vTmp), 0);
		wOne = (-B - scalarTmp) / (2.0f * A);
		wTwo = (-B + scalarTmp) / (2.0f * A);
#else
		wOne = (-B - sqrt(det)) / (2.0f * A);
		wTwo = (-B + sqrt(det)) / (2.0f * A);
#endif

		if(det == 0.0f)
		{	/* one root, wOne and wTwo should be equal */
			vec4_add(pt, &ray->origin, 
				vec4_scale(&tmp, &ray->direction, wOne));
			*distance = wOne;	/* pass back distance to intersection */
			return 1;
		}
		else
		{
			if(wOne > 0.0f)
				w = wOne;
			if(wTwo > 0.0f && wTwo < w)
				w = wTwo;
			/* now w is least positive root */
			/* use it to calculate where intersection point is */
			vec4_add(pt, &ray->origin, 
				vec4_scale(&tmp, &ray->direction, w));
			*distance = w;		/* pass back distance to intersection */
			return 1;
		}
	}

	/* all code paths above this point should have returned something
	 * but if we've gotten here anyways, assume no intersection occured */
#if defined(_DEBUG)
	printf("Ray-Sphere intersection test failed!!!\n");
#endif
	return 0;
}
Esempio n. 11
0
/* Scans the string pointed to by s for the character c and
 * returns a pointer to the last occurance of c. If
 * c is not found, then NULL is returned.
 */
char * strrchr(const char *s, int c)
{
  int nskip;
  vec_uchar16 *ptr, data, vc;
  vec_uint4 cmp_c, cmp_0, cmp;
  vec_uint4 res_ptr, res_cmp;
  vec_uint4 mask, result;
  vec_uint4 one = spu_splats(0xffffU);
  /* Scan memory array a quadword at a time. Skip leading
   * mis-aligned bytes.
   */
  ptr = (vec_uchar16 *)s;

  nskip = -((unsigned int)(ptr) & 15);
  mask = spu_rlmask(one, nskip);

  vc = spu_splats((unsigned char)(c));

  data = *ptr++;
  ptr = (vec_uchar16 *)((unsigned int)ptr & ~15);

  cmp_c = spu_and(spu_gather(spu_cmpeq(data, vc)), mask);
  cmp_0 = spu_and(spu_gather(spu_cmpeq(data, 0)), mask);

  res_ptr = spu_splats(0U);
  res_cmp = spu_splats(0U);

  while (spu_extract(cmp_0, 0) == 0) {
    cmp = spu_cmpeq(cmp_c, 0);

    res_ptr = spu_sel(spu_promote((unsigned int)(ptr), 0), res_ptr, cmp);
    res_cmp = spu_sel(cmp_c, res_cmp, cmp);

    data = *ptr++;

    cmp_c = spu_gather(spu_cmpeq(data, vc));
    cmp_0 = spu_gather(spu_cmpeq(data, 0));

    cmp = spu_cmpeq(cmp_c, 0);
  }

  /* Compute the location of the last character before termination
   * character.
   *
   * First mask off compare results following the first termination character.
   */
  mask = spu_sl(one, 31 - spu_extract(spu_cntlz(cmp_0), 0));
  cmp_c = spu_and(cmp_c, mask);

  /* Conditionally update res_ptr and res_cmd if a match was found in the last
   * quadword.
   */
  cmp = spu_cmpeq(cmp_c, 0);

  res_ptr = spu_sel(spu_promote((unsigned int)(ptr), 0), res_ptr, cmp);
  res_cmp = spu_sel(cmp_c, res_cmp, cmp);

  /* Bit reserve res_cmp for locating last occurance.
   */
  mask = spu_cmpeq(res_cmp, 0);

  res_cmp = (vec_uint4)spu_maskb(spu_extract(res_cmp, 0));
  res_cmp = spu_gather((vec_uchar16)spu_shuffle(res_cmp, res_cmp,
						VEC_LITERAL(vec_uchar16,
							    15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0)));

  /* Compute the location (ptr) of the last occurance of c. If no
   * occurance was found (ie, element 0 of res_cmp == 0, then return
   * NULL.
   */
  result = spu_sub(spu_add(res_ptr, 15), spu_cntlz(res_cmp));
  result = spu_andc(result, mask);

  return ((char *)spu_extract(result, 0));
}