/* Calculates the length of the string s, not including the terminating * \0 character. */ size_t strlen(const char *s) { size_t len; unsigned int cnt, cmp, skip, mask; vec_uchar16 *ptr, data; /* Compensate for initial mis-aligned string. */ ptr = (vec_uchar16 *)s; skip = (unsigned int)(ptr) & 15; mask = 0xFFFF >> skip; data = *ptr++; cmp = spu_extract(spu_gather(spu_cmpeq(data, 0)), 0); cmp &= mask; cnt = spu_extract(spu_cntlz(spu_promote(cmp, 0)), 0); len = cnt - (skip + 16); while (cnt == 32) { data = *ptr++; len -= 16; cnt = spu_extract(spu_cntlz(spu_gather(spu_cmpeq(data, 0))), 0); len += cnt; } return (len); }
unsigned int __mfc_tag_release (unsigned int tag) { vector unsigned int is_invalid; vector unsigned int mask = (vector unsigned int) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; vector signed int zero = (vector signed int) { 0, 0, 0, 0 }; vector signed int has_been_reserved; /* Check if the tag is out of range. */ is_invalid = spu_cmpgt (spu_promote (tag, 0), 31); /* Check whether the tag has been reserved, set to all 1 if has not been reserved, 0 otherwise. */ has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag); has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved); /* Set invalid. */ is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid); mask = spu_rlmask (mask, (int)(-tag)); __mfc_tag_table = spu_or (__mfc_tag_table, mask); return spu_extract(is_invalid, 0); }
vec_ullong2 bitDiff_d2(vec_double2 ref, vec_double2 vals) { double ref0, ref1, vals0, vals1; long long refi0, refi1, valsi0, valsi1, diff0, diff1; vec_ullong2 bits; ref0 = spu_extract(ref,0); ref1 = spu_extract(ref,1); vals0 = spu_extract(vals,0); vals1 = spu_extract(vals,1); refi0 = make_ulonglong(ref0); refi1 = make_ulonglong(ref1); valsi0 = make_ulonglong(vals0); valsi1 = make_ulonglong(vals1); diff0 = refi0 - valsi0; diff1 = refi1 - valsi1; if ( diff0 < 0 ) { diff0 = valsi0 - refi0; } if ( diff1 < 0 ) { diff1 = valsi1 - refi1; } bits = spu_promote( (unsigned long long)ceil(log2((double)diff0)), 0 ); bits = spu_insert( (unsigned long long)ceil(log2((double)diff1)), bits, 1 ); return bits; }
vec_ullong2 ulpDiff_d2(vec_double2 ref, vec_double2 vals) { double ref0, ref1, vals0, vals1; long long refi0, refi1, valsi0, valsi1, diff0, diff1; vec_ullong2 ulps; ref0 = spu_extract(ref,0); ref1 = spu_extract(ref,1); vals0 = spu_extract(vals,0); vals1 = spu_extract(vals,1); refi0 = make_ulonglong(ref0); refi1 = make_ulonglong(ref1); valsi0 = make_ulonglong(vals0); valsi1 = make_ulonglong(vals1); diff0 = refi0 - valsi0; diff1 = refi1 - valsi1; if ( diff0 < 0 ) { diff0 = valsi0 - refi0; } if ( diff1 < 0 ) { diff1 = valsi1 - refi1; } ulps = spu_promote( (unsigned long long)diff0, 0 ); ulps = spu_insert( (unsigned long long)diff1, ulps, 1 ); return ulps; }
void writeTriangleBuffer(Triangle* endTriangle) { if (endTriangle != _currentTriangle) { int length = ( ((char*)endTriangle) - _currentTriangleBuffer + 127) & ~127; unsigned short endTriangleBase = (((char*)endTriangle) - ((char*)_currentTriangle)) + _currentTriangleOffset; vec_ushort8 v_new_end = spu_promote(endTriangleBase, 1); // calculate genuine next pointer ( rewind==0 -> next, rewind!=0 -> 0 ) unsigned short next_pointer = spu_extract( spu_andc( v_new_end, _currentTriangleRewind ), 1 ); _currentTriangle->next_triangle = next_pointer; // printf("current=0x%x, endTriBase=0x%x, next_pointer=0x%x\n", _currentTriangleOffset, endTriangleBase, next_pointer); // DMA the triangle data out spu_mfcdma64(_currentTriangleBuffer, mfc_ea2h(_currentTriangleBufferEA), mfc_ea2l(_currentTriangleBufferEA), length, 0, MFC_PUT_CMD); // update the information in the cache line _currentTriangleRewind = spu_splats(next_pointer); // re-use this variable as we don't need it anymore char* dstart = ((char*)&_currentTriangleRewind) + (_currentTriangleCacheEndTriangleEAL & 15); spu_mfcdma64(dstart, _currentTriangleCacheEndTriangleEAH, _currentTriangleCacheEndTriangleEAL, sizeof(short), 0, MFC_PUTB_CMD); // printf("writing from %x to %x:%x\n", dstart, _currentTriangleCacheEndTriangleEAH, _currentTriangleCacheEndTriangleEAL); // finally invalidate the triangle info _currentTriangle = NULL; // and make sure the DMA completed mfc_write_tag_mask(1<<0); mfc_read_tag_status_all(); } }
inline void merge_cache_blocks(RenderableCacheLine* cache) { vec_uchar16 next = cache->chunkNext; for (;;) { vec_uchar16 nextnext = spu_shuffle(next, next, next); vec_uchar16 nextmask = spu_and(next, spu_splats((unsigned char)CHUNKNEXT_MASK)); vec_ushort8 firstblock0 = spu_cmpeq( cache->chunkStart[0], 0); vec_ushort8 firstblock1 = spu_cmpeq( cache->chunkStart[1], 0); // change next to word offset, note we don't care what the low bit shifted in is vec_uchar16 firstshuf = (vec_uchar16) spu_sl( (vec_ushort8)nextmask, 1 ); vec_uchar16 first = (vec_uchar16) spu_shuffle( firstblock0, firstblock1, firstshuf ); vec_ushort8 tri0 = cache->chunkTriangle[0]; vec_ushort8 tri1 = cache->chunkTriangle[1]; vec_uchar16 trishufhi = spu_or ( firstshuf, spu_splats((unsigned char) 1)); vec_uchar16 trishuflo = spu_and( firstshuf, spu_splats((unsigned char) 254)); vec_ushort8 ntri0 = spu_shuffle( tri0, tri1, spu_shuffle( trishuflo, trishufhi, SHUF0 ) ); vec_ushort8 ntri1 = spu_shuffle( tri0, tri1, spu_shuffle( trishuflo, trishufhi, SHUF1 ) ); vec_ushort8 trieq0 = spu_cmpeq( tri0, ntri0 ); vec_ushort8 trieq1 = spu_cmpeq( tri1, ntri1 ); vec_uchar16 trieq = (vec_uchar16) spu_shuffle( trieq0, trieq1, MERGE ); vec_uchar16 combi = spu_orc(first, trieq); vec_uchar16 canmerge = spu_cmpgt( spu_nor(spu_or(next, nextnext), combi), 256-CHUNKNEXT_BUSY_BIT ); vec_uint4 gather = spu_gather( canmerge ); vec_uint4 mergeid = spu_sub( spu_cntlz( gather ), spu_promote((unsigned int)16, 0)); if( !spu_extract(gather, 0) ) { return; } // unsigned int firstchunk = spu_extract(mergeid, 0); // unsigned int nextchunk = cache->chunkNextArray[firstchunk]; vec_uint4 v_chunkNext = (vec_uint4) si_rotqby( (qword) next, (qword) spu_add(mergeid,13) ); vec_uint4 v_chunkNextNext = (vec_uint4) si_rotqby( (qword) next, (qword) spu_add(v_chunkNext,13) ); // cache->chunkNextArray[firstchunk] = cache->chunkNextArray[nextchunk]; next = spu_shuffle( (vec_uchar16) v_chunkNextNext, next, (vec_uchar16) si_cbd( (qword) mergeid, 0 ) ); // cache->chunkNextArray[nextchunk] = CHUNKNEXT_FREE_BLOCK; next = spu_shuffle( spu_splats( (unsigned char) CHUNKNEXT_FREE_BLOCK), next, (vec_uchar16) si_cbd( (qword) v_chunkNext, 0 ) ); // this is for debug use only, it's not really needed... // cache->chunkStartArray[nextchunk] = -1; cache->chunkStartArray[ spu_extract(v_chunkNext,0) & 255 ] = -1; cache->chunkNext = next; } }
unsigned int __mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags) { vector unsigned int table_copy, tmp, tmp1; vector unsigned int one = (vector unsigned int) { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; vector unsigned int is_invalid; unsigned int last_tag; vector unsigned int has_been_reserved; last_tag = first_tag + number_of_tags; table_copy = spu_sl (one, number_of_tags); table_copy = spu_rl (table_copy, -last_tag); table_copy = spu_xor (table_copy, -1); /* Make sure the tags are in range and valid. */ tmp = spu_cmpgt (spu_promote(last_tag, 0), 32); tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32); is_invalid = spu_cmpgt (spu_promote(first_tag, 0), 31); /* All bits are set to 1 if invalid, 0 if valid. */ is_invalid = spu_or (tmp, is_invalid); is_invalid = spu_or (tmp1, is_invalid); /* check whether these tags have been reserved */ tmp = spu_rlmask (one, (int)-number_of_tags); tmp1 = spu_sl (__mfc_tag_table, first_tag); has_been_reserved = spu_cmpgt(tmp1, tmp); is_invalid = spu_or (has_been_reserved, is_invalid); table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy); __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid); return spu_extract (is_invalid, 0); }
unsigned long long ulpDiff_d(double ref, double val) { return spu_extract(ulpDiff_d2(spu_promote(ref,0), spu_promote(val,0)), 0); }
unsigned int ulpDiff_f(float ref, float val) { return spu_extract(ulpDiff_f4(spu_promote(ref,0), spu_promote(val,0)), 0); }
/* tests if ray intersects a given sphere */ int ray_intersect_sphere(const ray_t *ray, const sphere_t* sphere, point_t *pt, float *distance) { vector4_t tmp; /* used to hold scale of ray direction */ float A = 1; /* since we know ray direction is normalized */ float dx = ray->origin.x - sphere->center.x; float dy = ray->origin.y - sphere->center.y; float dz = ray->origin.z - sphere->center.z; float B = 2 * ( ray->direction.x * (dx) + ray->direction.y * (dy) + ray->direction.z * (dz)); float C = (dx * dx + dy * dy + dz * dz) - (sphere->radius * sphere->radius); float det = (B*B) - (4 * A * C); float wOne; /* distance to first intersection */ float wTwo; /* distance to second intersection */ float w = 0.0f; /* least positive w */ #ifdef __SPU__ vector float vTmp; float scalarTmp; #endif if(det < 0.0f) { /* intersection is behind ray so none at all*/ return 0; } else { /* no need to use A since it's 1 */ #ifdef __SPU__ vTmp = spu_promote(det, 0); scalarTmp = spu_extract( _sqrtf4(vTmp), 0); wOne = (-B - scalarTmp) / (2.0f * A); wTwo = (-B + scalarTmp) / (2.0f * A); #else wOne = (-B - sqrt(det)) / (2.0f * A); wTwo = (-B + sqrt(det)) / (2.0f * A); #endif if(det == 0.0f) { /* one root, wOne and wTwo should be equal */ vec4_add(pt, &ray->origin, vec4_scale(&tmp, &ray->direction, wOne)); *distance = wOne; /* pass back distance to intersection */ return 1; } else { if(wOne > 0.0f) w = wOne; if(wTwo > 0.0f && wTwo < w) w = wTwo; /* now w is least positive root */ /* use it to calculate where intersection point is */ vec4_add(pt, &ray->origin, vec4_scale(&tmp, &ray->direction, w)); *distance = w; /* pass back distance to intersection */ return 1; } } /* all code paths above this point should have returned something * but if we've gotten here anyways, assume no intersection occured */ #if defined(_DEBUG) printf("Ray-Sphere intersection test failed!!!\n"); #endif return 0; }
/* Scans the string pointed to by s for the character c and * returns a pointer to the last occurance of c. If * c is not found, then NULL is returned. */ char * strrchr(const char *s, int c) { int nskip; vec_uchar16 *ptr, data, vc; vec_uint4 cmp_c, cmp_0, cmp; vec_uint4 res_ptr, res_cmp; vec_uint4 mask, result; vec_uint4 one = spu_splats(0xffffU); /* Scan memory array a quadword at a time. Skip leading * mis-aligned bytes. */ ptr = (vec_uchar16 *)s; nskip = -((unsigned int)(ptr) & 15); mask = spu_rlmask(one, nskip); vc = spu_splats((unsigned char)(c)); data = *ptr++; ptr = (vec_uchar16 *)((unsigned int)ptr & ~15); cmp_c = spu_and(spu_gather(spu_cmpeq(data, vc)), mask); cmp_0 = spu_and(spu_gather(spu_cmpeq(data, 0)), mask); res_ptr = spu_splats(0U); res_cmp = spu_splats(0U); while (spu_extract(cmp_0, 0) == 0) { cmp = spu_cmpeq(cmp_c, 0); res_ptr = spu_sel(spu_promote((unsigned int)(ptr), 0), res_ptr, cmp); res_cmp = spu_sel(cmp_c, res_cmp, cmp); data = *ptr++; cmp_c = spu_gather(spu_cmpeq(data, vc)); cmp_0 = spu_gather(spu_cmpeq(data, 0)); cmp = spu_cmpeq(cmp_c, 0); } /* Compute the location of the last character before termination * character. * * First mask off compare results following the first termination character. */ mask = spu_sl(one, 31 - spu_extract(spu_cntlz(cmp_0), 0)); cmp_c = spu_and(cmp_c, mask); /* Conditionally update res_ptr and res_cmd if a match was found in the last * quadword. */ cmp = spu_cmpeq(cmp_c, 0); res_ptr = spu_sel(spu_promote((unsigned int)(ptr), 0), res_ptr, cmp); res_cmp = spu_sel(cmp_c, res_cmp, cmp); /* Bit reserve res_cmp for locating last occurance. */ mask = spu_cmpeq(res_cmp, 0); res_cmp = (vec_uint4)spu_maskb(spu_extract(res_cmp, 0)); res_cmp = spu_gather((vec_uchar16)spu_shuffle(res_cmp, res_cmp, VEC_LITERAL(vec_uchar16, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))); /* Compute the location (ptr) of the last occurance of c. If no * occurance was found (ie, element 0 of res_cmp == 0, then return * NULL. */ result = spu_sub(spu_add(res_ptr, 15), spu_cntlz(res_cmp)); result = spu_andc(result, mask); return ((char *)spu_extract(result, 0)); }