void internal_lessequal(long a, float b) { //printf("Inside internal add\n"); //b must be wrapped in a PyArrayObject PyArrayObject *scalar = (PyArrayObject*)_malloc_align(sizeof(PyArrayObject),7); scalar->blockData = (char**)_malloc_align(sizeof(char*),7); scalar->blockData[0] = (char*)_malloc_align(sizeof(float),7); ((float*)(scalar->blockData[0]))[0] = b; scalar->numberOfBlocks = 1; //Setting the Operation object Operation_t op1; op1.shaderSize = arraylessequal_arrayscalar_size; op1.EA_shader = arraylessequal_arrayscalar; op1.obj[0] = a; op1.obj[1] = scalar; op1.obj[2] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in LESSEQUAL\n"); }
/* * Run with three arrays, no upload */ void run30nu( unsigned int shaderindex ) { unsigned int checked = 0; unsigned int state = 300 + shaderindex, r; unsigned int i; unsigned int num_SPES = 1; printf( "HERE2\n" ); for ( i = 0 ; i < num_SPES ; i++ ) { spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
/* * Run with one array */ void run10( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); Printf1( "Using %u SPEs\n", op1.num_SPES ); op1.obj[0] = pyobj1; unsigned int checked = 0; unsigned int state = 0, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
int spe_get_status(int id) { if(spe_out_mbox_status(threads[id].speid) > 0) { spe_out_mbox_read(threads[id].speid, &(threads[id].status), 1); } return threads[id].status; }
void *mailbox_pthread_function(void *thread_arg) { spe_context_ptr_t ctx = * (spe_context_ptr_t*) thread_arg; unsigned int mbox_data = 0; int i=0; for (i=0;i<10;i++) { // citim zece mesaje de la SPE /*TODO(2): cititi prin mailbox-ul out_intr etapa terminata de SPE-ul cu contextul ctx*/ while(spe_out_mbox_status(ctx) == 0); spe_out_mbox_read(ctx, &mbox_data, 1); printf("[PPU]: SPU %p a terminat etapa %d\n",ctx,mbox_data); } return NULL; }
int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg) { deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); unsigned int out_messages[1]; while (!spe_out_mbox_status(spe_data->ctx)); int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); if (out_messages[0] == msg) return 0; else return -1; }
void internal_div(long a, long b) { //printf("Inside internal add\n"); //Setting the Operation object Operation_t op1; op1.shaderSize = arraydivide_size; op1.EA_shader = arraydivide; op1.obj[0] = a; op1.obj[1] = b; op1.obj[2] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in DIV\n"); }
/* * Run with one array, with return value */ float run10r( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); op1.obj[0] = pyobj1; //float *results[6]; unsigned int checked = 0; unsigned int state = 100, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { //results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)resultFloats[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } float r1 = 0.0f; for ( i = 0 ; i < op1.num_SPES ; i++ ) { r1 += resultFloats[i][0]; // Cleanup // free( results[i] ); } return r1; }
double SNRM2( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Tic(); Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.num_SPES = speThreads; float *results[6]; unsigned int state = 2, r; unsigned int i; //printf( "Sending states to SPEs\n" ); for ( i = 0 ; i < speThreads ; i++ ) { results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)results[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } float r1 = 0.0f; for ( i = 0 ; i < speThreads ; i++ ) { r1 += results[i][0]; // Cleanup free( results[i] ); } r1 = sqrtf( r1 ); double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); printf( "Result=%f\n", r1 ); return time; }
double SSCAL( PyArrayObject *pyobj1, PyArrayObject *pyscalar1, unsigned int shadersize, unsigned int *shader ) { Tic(); Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.scalar[0] = pyscalar1; op1.num_SPES = speThreads; unsigned int state = 1, r; unsigned int i; //printf( "Sending states to SPEs\n" ); for ( i = 0 ; i < speThreads ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); for( i = 0 ; i < 3 ; i++ ) { printf( "%u=%f\n", i, ((float*)pyobj1->blockData[0])[i] ); } return time; }
/* * Run with three arrays, no run */ void run30nr( PyArrayObject *pyobj1, PyArrayObject *pyobj2, PyArrayObject *pyobj3, unsigned int shadersize, unsigned int *shader, unsigned int shaderindex ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs //op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); op1.num_SPES = 1; op1.obj[0] = pyobj1; op1.obj[1] = pyobj2; op1.obj[2] = pyobj3; unsigned int checked = 0; unsigned int state = 200 + shaderindex, r; unsigned int i; // printf( "HERE %u!\n", state ); // printf( "SIZE=%u, ADDR=%#x\n", op1.shaderSize, op1.EA_shader ); for ( i = 0 ; i < op1.num_SPES ; i++ ) { //printf( "-->%u\n",i ); spe_pointer_addr[i][shaderindex] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
/* * Run with two arrays and two scalars */ void run22( PyArrayObject *A, PyArrayObject *x, PyArrayObject *alpha, PyArrayObject *j, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( A->numberOfBlocks ); op1.obj[0] = A; op1.obj[1] = x; op1.scalar[0] = alpha; op1.scalar[1] = j; unsigned int checked = 0; unsigned int state = 0, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
unsigned int internal_sum(long a) { //printf("Inside internal add\n"); //Setting the Operation object Operation_t op1; op1.shaderSize = arraysum_size; op1.EA_shader = arraysum; op1.obj[0] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 2; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int signal; //float r; //printf("1. r[0]: %f\n",resultFloats[0][0]); //printf("1. adr is: %x\n",&(resultFloats[0][0])); unsigned int pointersToResultFloats[speThreads]; unsigned int e; for(e=0;e<speThreads;e++) { pointersToResultFloats[e] = &(resultFloats[e][0]); } for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending res pointers to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &pointersToResultFloats[y], 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } float finalRes = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &signal, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in DIV\n"); //printf("2. r[0]: %f\n",resultFloats[y][0]); //Add all the results together unsigned int u = 0; for(u=0;u<speThreads;u++) { finalRes = finalRes+resultFloats[u][0]; } return finalRes; }
void internal_random(PyArrayObject *a) { //printf("adr of PyArrayObject: %x,%d\n",result,result); //Setting the PyArrayObject //result->numberOfBlocks = _numberOfBlocks*_numberOfBlocks; //result->numberOfBlocksXDim = _numberOfBlocks; //result->numberOfBlocksYDim = _numberOfBlocks; //result->blockSize = _blockSize;//in elements //result->nd = 1; //result->blockData = (char**)_malloc_align(sizeof(char*)*_numberOfBlocks*_numberOfBlocks,7); //unsigned int i = 0; //for(i = 0;i<_numberOfBlocks*_numberOfBlocks;i++) // { // result->blockData[i] = (char*)_malloc_align(sizeof(char)*_blockSize*_blockSize*4,7); // } //printf("Adr of blockData: %x\n",result->blockData); //printf("Adr of blockData: %d\n",result->blockData); //Setting the Operation object Operation_t op1; op1.shaderSize = randomfiller_size; op1.EA_shader = randomfiller; op1.obj[0] = a; //printf("randomfiller: %d\n",randomfiller); //printf("&randomfiller: %d\n",&randomfiller); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("SUM Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; //printf("checked is: %d\n",checked); //printf("speThreads is: %d\n",speThreads); while( checked < speThreads ) { //printf("checked is: %d\n",checked); if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish\n"); }
double SDOT2( PyArrayObject *pyobj1, PyArrayObject *pyobj2, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.obj[1] = pyobj2; op1.num_SPES = speThreads; unsigned int state = 4; unsigned int i, r; unsigned int checked = 0; float *results[6]; // Setup SPEs for ( i = 0 ; i < speThreads ; i++ ) { results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)results[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } Tic(); // Make SPEs run state = 6; for ( i = 0 ; i < speThreads ; i++ ) { spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } // Get result float r1 = 0.0f; for ( i = 0 ; i < speThreads ; i++ ) { r1 += results[i][0]; // Cleanup free( results[i] ); } double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); printf( "Result=%f\n", r1 ); return time; }