int cacheGetPrime(int n)
{
    if ((n < primeCacheStart + primeCacheSize) && (n > primeCacheStart))
    {
        int r = spu_extract(primeCacheData[(n - primeCacheStart) / 4], n%4);
        return r;
    }

    // Haal op.

    uint32_t    tag, size;
    tag = mfc_tag_reserve();
    size = CACHE_PRIME_SIZE*16;

    unsigned long long  EA = setup.vPrimes + (n - n%4) * 4;

    mfc_get(&primeCacheData, EA, size, tag, 0, 0);
    mfc_write_tag_mask(1 << tag);
    mfc_read_tag_status_all();
    mfc_tag_release(tag);

    primeCacheStart = n - (n % 4);

    int r = spu_extract(primeCacheData[(n - primeCacheStart) / 4], n%4);
    return r;
}
Example #2
0
void check_pull_dma(int side){
  // Check left
  if(md[am].held_tag[side] < 32){
    mfc_write_tag_mask( 1 << md[am].held_tag[side] );
    int status = mfc_read_tag_status_immediate();

    if(status){
      // Update idx
      md[am].idx[side][HEAD] = spu_add(md[am].idx[side][HEAD], md[am].num_waiting[side]);

      vector signed int buffer_size = spu_splats(mcb[am].buffer_size[side] -1);
      vector unsigned int cmp_v = spu_cmpgt(md[am].idx[side][HEAD], buffer_size);
      vector signed int zeros = {0,0,0,0};
      buffer_size = spu_add(buffer_size,1);
      zeros = spu_sel(zeros,buffer_size,cmp_v);
      md[am].idx[side][HEAD] = spu_sub(md[am].idx[side][HEAD],zeros);

      md[am].num_pulled[side] += md[am].num_waiting[side];      
      md[am].num_waiting[side] = 0;
      if(md[am].num_pulled[side] == mcb[am].data_size[side]){
	md[am].mm_depleted[side] = 1;
      }
      // Release tag
      mfc_tag_release( md[am].held_tag[side] );
      md[am].held_tag[side] = 32;      
    }
  }
}
Example #3
0
void check_push_dma(){
  if(md[am].held_tag[OUT] < 32){

    mfc_write_tag_mask( 1 << md[am].held_tag[OUT] );
    int status = mfc_read_tag_status_immediate();

    if(status){
      // Release tag
      mfc_tag_release( md[am].held_tag[OUT] );
      md[am].held_tag[OUT] = 32;

      if(md[am].consumed[LEFT] == mcb[am].data_size[LEFT] && md[am].consumed[RIGHT] ==  mcb[am].data_size[RIGHT]){
	if(num_in_buffer(OUT) == 0){
	  md[am].done = 1;
	  --num_active_mergers;
	}
      }
    }
  }
}
Example #4
0
void as_exit (int status)
{
  mfc_tag_release (tag);
  exit (status);
}
Example #5
0
void MakeNodes()
{
	uint put_tag[2];

	put_tag[0] = mfc_tag_reserve();
	put_tag[1] = mfc_tag_reserve();

	ushort b = 0;

	kdbuffer_t l_kdb ALIGNED(16);
	kdbuffer_t r_kdb ALIGNED(16);

	kdnode_t node ALIGNED(16);
	kdbuffer_t kdb	ALIGNED(16);
	DoubleBufInit(&aabb_db, 0, 0, sizeof(aabb_t), NUM_AABBS, aabbbuffer[0], aabbbuffer[1]);

	// printf("Empty? %i\n", BufferEmpty(&arg.kdbuffer[b]));

	while(! BufferEmpty(&arg.kdbuffer[b]) )
	{
		kdbuffer_t *pkdb = (kdbuffer_t*)arg.kdbuffer[b].buffer;
		int size = BufferNumElements(&arg.kdbuffer[b]);
		int i;
		
		BufferClear(&arg.aabb_buffer[1-b]);
		BufferClear(&arg.kdbuffer[1-b]);
	
		// printf("size %i\n", size);

		for(i=0; i < size; i++)
		{
			l_kdb.node = arg.curnode++;
			r_kdb.node = arg.curnode++;		

			memcpy_ls(&kdb, &pkdb[i], sizeof(kdbuffer_t));

			node.split = kdb.plane;
			node.axis =  kdb.axis;
			node.left =  l_kdb.node;
			node.right = r_kdb.node;	


			memcpy_ea(&arg.nodes[ kdb.node ], &node, sizeof(kdnode_t));


			KDBufferAllocate(&l_kdb, kdb.left_size, &arg.aabb_buffer[1-b]);

			if(curjob < arg.njobs)
				KDBufferAllocate(&r_kdb, kdb.right_size, &arg.job_aabb_buffer[curjob]);
			else
				KDBufferAllocate(&r_kdb, kdb.right_size, &arg.aabb_buffer[1-b]);

			KDPartitionAll(&kdb, &l_kdb, &r_kdb);

			if(l_kdb.depth == arg.maxdepth || l_kdb.size <= arg.maxleafsize)
			{
				total_leaf_size += l_kdb.count;

				l_kdb.aabb = (aabb_t*)BufferCopyTo(&arg.leaf_aabb_buffer, l_kdb.aabb, l_kdb.count);
				BufferCopyToLS(&arg.leafbuffer, &l_kdb, 1);
			}			
			else
			{	
				BufferCopyToLS(&arg.kdbuffer[1-b], &l_kdb, 1);
			}

			if(r_kdb.depth == arg.maxdepth || r_kdb.size <= arg.maxleafsize)
			{
				total_leaf_size += r_kdb.count;
		
				if(curjob < arg.njobs)
				{
					r_kdb.aabb = (aabb_t*)BufferCopyTo(&arg.job_leaf_aabb_buffer[curjob], r_kdb.aabb, r_kdb.count);
					BufferCopyToLS(&arg.job_leafbuffer[curjob], &r_kdb, 1);

					spu_mfcdma32(&arg.job_leafbuffer[curjob], (uint)arg.pjob_leafbuffer[curjob], sizeof(buffer_t), jobtag, MFC_PUT_CMD);					
					DmaWait(jobtag);
		
				}
				else
				{
					r_kdb.aabb = (aabb_t*)BufferCopyTo(&arg.leaf_aabb_buffer, r_kdb.aabb, r_kdb.count);
					BufferCopyToLS(&arg.leafbuffer, &r_kdb, 1);

				}

			}
			else
			{
				if(curjob < arg.njobs)
				{
					BufferCopyToLS(&arg.job_kdbuffer[curjob], &r_kdb, 1);

					spu_mfcdma32(&arg.job_kdbuffer[curjob], (uint)arg.pjob_kdbuffer[curjob], sizeof(buffer_t), jobtag, MFC_PUT_CMD);					
					DmaWait(jobtag);

				}
				else
					BufferCopyToLS(&arg.kdbuffer[1-b], &r_kdb, 1);
			}


			/*
			if(curjob < njobs)
				KDBufferAllocate(&r_kdb, kdb[i].right_size, &jobs[curjob]->aabb_buffer[0]);
			else
				KDBufferAllocate(&r_kdb, kdb[i].right_size, &aabb_buffer[1-b]);

			KDPartition(&kdb[i], &l_kdb, &r_kdb);

			if(l_kdb.depth == maxdepth || l_kdb.size <= maxleafsize)
			{
				l_kdb.aabb = (aabb_t*)BufferCopyTo(&leaf_aabb_buffer, l_kdb.aabb, l_kdb.count);
				BufferCopyTo(&leafbuffer, &l_kdb, 1);
			}			
			else
				BufferCopyTo(&kdbuffer[1-b], &l_kdb, 1);

			if(r_kdb.depth == maxdepth || r_kdb.size <= maxleafsize)
			{
				if(curjob < njobs)
				{
					r_kdb.aabb = (aabb_t*)BufferCopyTo(&jobs[curjob]->leaf_aabb_buffer, r_kdb.aabb, r_kdb.count);
					BufferCopyTo(&jobs[curjob]->leafbuffer, &r_kdb, 1);
				}
				else
				{
					r_kdb.aabb = (aabb_t*)BufferCopyTo(&leaf_aabb_buffer, r_kdb.aabb, r_kdb.count);
					BufferCopyTo(&leafbuffer, &r_kdb, 1);
				}
			}
			else
			{
				if(curjob < njobs)
					BufferCopyTo(&jobs[curjob]->kdbuffer[0], &r_kdb, 1);
				else
					BufferCopyTo(&kdbuffer[1-b], &r_kdb, 1);
			}
			*/



			if(curjob < arg.njobs)
			{
				// Start other job
				
				ppe_post_sema(arg.sema[curjob]);
				curjob++;
			}


	
		}
	
	
		b =  1 - b;

	}

	while( curjob < arg.njobs)
	{
		ppe_post_sema(arg.sema[curjob]);
		curjob++;
	}



	// Transfer back
	spu_mfcdma32(&arg.curnode, (unsigned int)arg.pcurnode, (unsigned int)sizeof(int), put_tag[0], MFC_PUT_CMD);
	spu_mfcdma32(&total_leaf_size, (unsigned int)arg.ptotal_leaf_size, (unsigned int)sizeof(int), put_tag[1], MFC_PUT_CMD);
	

	DmaWait(put_tag[0]);
	DmaWait(put_tag[1]);


	spu_mfcdma32(&arg.leafbuffer, (unsigned int)arg.pleafbuffer, (unsigned int)sizeof(buffer_t), put_tag[0], MFC_PUT_CMD);
	spu_mfcdma32(&arg.leaf_aabb_buffer, (unsigned int)arg.pleaf_aabb_buffer, (unsigned int)sizeof(buffer_t), put_tag[1], MFC_PUT_CMD);

	DmaWaitAll();

	mfc_tag_release(put_tag[0]);
	mfc_tag_release(put_tag[1]);

}
Example #6
0
int
main(
    unsigned long long spe_id,
    unsigned long long ppu_vector_a,
    unsigned long long ppu_vector_b)
{
    int i, iter, buf_idx, vec_idx;
    unsigned long long ppu_vector_bases[2] _ALIG(128);
    vector float * pchunk_a, * pchunk_b;
    vector float g_vec = {0,0,0,0};

    ppu_vector_bases[0] = ppu_vector_a;
    ppu_vector_bases[1] = ppu_vector_b;

    const unsigned int spu_num = spu_read_in_mbox();
    unsigned long long get_edge_bytes = spu_num * SUBVEC_SZ_BYTES;

    float buffers[NBUFFERS * BUF_SZ_FLOATS] _ALIG(128);
    int buffer_tags[NBUFFERS][2] _ALIG(128);
    //int buffer_tags[NBUFFERS];

    for (iter = 0; iter < NBUFFERS; ++iter) {
        buffer_tags[iter][0] = mfc_tag_reserve();
        buffer_tags[iter][1] = mfc_tag_reserve();
    }

    // first mfc_get for all
    for (buf_idx = 0; buf_idx < NBUFFERS; ++buf_idx) {
        for (vec_idx = 0; vec_idx < 2; ++vec_idx) {
            mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx),
                    ppu_vector_bases[vec_idx] + get_edge_bytes,
                    CHUNK_SZ_BYTES,
                    buffer_tags[buf_idx][vec_idx],
                    0, 0);
        }
    }
    get_edge_bytes += CHUNK_SZ_BYTES;

    //printf("subvec_sz-chunks: %d\n", SUBVEC_SZ_CHUNKS);
    //printf("%d==%d\n", MAXITER*NBUFFERS*CHUNK_SZ_FLOATS, SUBVEC_SZ_FLOATS);
    int chunksleft = SUBVEC_SZ_CHUNKS;
    while(chunksleft!=0) {
        for (buf_idx = 0; chunksleft !=0 && buf_idx < NBUFFERS; ++buf_idx) {
            const int tag_mask = (1 << buffer_tags[buf_idx][0])
                                 | (1 << buffer_tags[buf_idx][1]);

            mfc_write_tag_mask(tag_mask);
            mfc_read_tag_status_all();

            pchunk_a = buf_ptr_vecfloat(buffers, buf_idx, 0);
            pchunk_b = buf_ptr_vecfloat(buffers, buf_idx, 1);

            for (i = 0; i < CHUNK_SZ_FLOATVECS; ++i) {
                g_vec = spu_madd(pchunk_a[i], pchunk_b[i], g_vec);
            }

            // move this mfc_get to end of loop, check get_edge_bytes variable dynamics
            if (likely(iter != MAXITER - 1)) {
                for (vec_idx = 0; vec_idx < 2; ++vec_idx) {
                    mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx),
                            ppu_vector_bases[vec_idx] + get_edge_bytes,
                            CHUNK_SZ_BYTES,
                            buffer_tags[buf_idx][vec_idx],
                            0, 0);
                }
            }
            get_edge_bytes += CHUNK_SZ_BYTES;
            --chunksleft;
        }
    }

    for (iter = 0; iter < NBUFFERS; ++iter) {
        mfc_tag_release(buffer_tags[iter][0]);
        mfc_tag_release(buffer_tags[iter][1]);
    }

    float_uint_t retval;
    retval.f =
        spu_extract(g_vec, 0) +
        spu_extract(g_vec, 1) +
        spu_extract(g_vec, 2) +
        spu_extract(g_vec, 3);

    //printf("retval: %f\n", retval.f);
    spu_write_out_mbox(retval.i);

    return 0;
}