void internal_lessequal(long a, float b) { //printf("Inside internal add\n"); //b must be wrapped in a PyArrayObject PyArrayObject *scalar = (PyArrayObject*)_malloc_align(sizeof(PyArrayObject),7); scalar->blockData = (char**)_malloc_align(sizeof(char*),7); scalar->blockData[0] = (char*)_malloc_align(sizeof(float),7); ((float*)(scalar->blockData[0]))[0] = b; scalar->numberOfBlocks = 1; //Setting the Operation object Operation_t op1; op1.shaderSize = arraylessequal_arrayscalar_size; op1.EA_shader = arraylessequal_arrayscalar; op1.obj[0] = a; op1.obj[1] = scalar; op1.obj[2] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in LESSEQUAL\n"); }
void handleFakeTrapInt (int chosenSpu) { unsigned int message; spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(message), 1); pthread_mutex_lock(&fakeTrapLock); sysConsoleWriteChar('F'); sysConsoleWriteChar('T'); sysConsoleWriteChar('['); sysConsoleWriteInteger(chosenSpu, 0); sysConsoleWriteChar(']'); sysConsoleWriteChar(':'); sysConsoleWriteChar('>'); sysConsoleWriteChar(' '); sysConsoleWriteInteger((int) message, 1); sysConsoleWriteChar('\n'); pthread_mutex_unlock(&fakeTrapLock); // send back ACK message = ACK; if (spe_in_mbox_write(global_spu_data->spus[chosenSpu].ctx, &(message), 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { fprintf(stderr, "Error writing ack for console write message\n"); exit(1); } }
/* * Run with three arrays, no upload */ void run30nu( unsigned int shaderindex ) { unsigned int checked = 0; unsigned int state = 300 + shaderindex, r; unsigned int i; unsigned int num_SPES = 1; printf( "HERE2\n" ); for ( i = 0 ; i < num_SPES ; i++ ) { spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
void handleFakeTrap (int chosenSpu) { unsigned int message; char * str; int i, length; spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(message), 1); pthread_mutex_lock(&fakeTrapLock); sysConsoleWriteChar('F'); sysConsoleWriteChar('T'); sysConsoleWriteChar('['); sysConsoleWriteInteger(chosenSpu, 0); sysConsoleWriteChar(']'); sysConsoleWriteChar(':'); sysConsoleWriteChar('>'); sysConsoleWriteChar(' '); str = (char *) (global_spu_data->boot_record->fakeTrapStrs[message]); length = *((int *) (str - 4)); for (i=0; i<length; i++) { sysConsoleWriteChar(str[i]); } sysConsoleWriteChar('\n'); pthread_mutex_unlock(&fakeTrapLock); // send back ACK message = ACK; if (spe_in_mbox_write(global_spu_data->spus[chosenSpu].ctx, &(message), 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { fprintf(stderr, "Error writing ack for console write message\n"); exit(1); } }
extern "C" void runMigratedMethod(SpuThreadData * spu_data, int chosenSpu, int runMethodSignal) { unsigned int signalData, err; // signal spu to run method signalData = (unsigned int) runMethodSignal; if (spe_in_mbox_write(spu_data->spus[chosenSpu].ctx, &signalData, 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { perror("Failed while trying to signal method details to Cell SPU"); exit(1); } // check we get an ACK back if (spe_out_intr_mbox_read(spu_data->spus[chosenSpu].ctx, &signalData, 1, SPE_MBOX_ALL_BLOCKING) < 0) { perror("Failed reading SPU mailbox while awaiting SPU method invocation"); exit(1); } // if method run was not acked if (signalData != ACK) { // read error signal spe_out_mbox_read(spu_data->spus[chosenSpu].ctx, &err, 1); fprintf(stderr, "SPU did not ACK method run signal, signaled 0x%x, returned error no. 0x%x\n", signalData, err); exit(1); } }
/* * Run with one array */ void run10( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); Printf1( "Using %u SPEs\n", op1.num_SPES ); op1.obj[0] = pyobj1; unsigned int checked = 0; unsigned int state = 0, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
void handleTrap (int chosenSpu) { unsigned int message; spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(message), 1); fprintf(stderr, "Error, SPU %i trapped with value 0x%x\n", chosenSpu, message); exit(1); }
int spe_get_status(int id) { if(spe_out_mbox_status(threads[id].speid) > 0) { spe_out_mbox_read(threads[id].speid, &(threads[id].status), 1); } return threads[id].status; }
void *mailbox_pthread_function(void *thread_arg) { spe_context_ptr_t ctx = * (spe_context_ptr_t*) thread_arg; unsigned int mbox_data = 0; int i=0; for (i=0;i<10;i++) { // citim zece mesaje de la SPE /*TODO(2): cititi prin mailbox-ul out_intr etapa terminata de SPE-ul cu contextul ctx*/ while(spe_out_mbox_status(ctx) == 0); spe_out_mbox_read(ctx, &mbox_data, 1); printf("[PPU]: SPU %p a terminat etapa %d\n",ctx,mbox_data); } return NULL; }
int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg) { deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); unsigned int out_messages[1]; while (!spe_out_mbox_status(spe_data->ctx)); int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); if (out_messages[0] == msg) return 0; else return -1; }
void internal_div(long a, long b) { //printf("Inside internal add\n"); //Setting the Operation object Operation_t op1; op1.shaderSize = arraydivide_size; op1.EA_shader = arraydivide; op1.obj[0] = a; op1.obj[1] = b; op1.obj[2] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in DIV\n"); }
double SNRM2( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Tic(); Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.num_SPES = speThreads; float *results[6]; unsigned int state = 2, r; unsigned int i; //printf( "Sending states to SPEs\n" ); for ( i = 0 ; i < speThreads ; i++ ) { results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)results[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } float r1 = 0.0f; for ( i = 0 ; i < speThreads ; i++ ) { r1 += results[i][0]; // Cleanup free( results[i] ); } r1 = sqrtf( r1 ); double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); printf( "Result=%f\n", r1 ); return time; }
/* * Run with one array, with return value */ float run10r( PyArrayObject *pyobj1, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); op1.obj[0] = pyobj1; //float *results[6]; unsigned int checked = 0; unsigned int state = 100, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { //results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)resultFloats[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } float r1 = 0.0f; for ( i = 0 ; i < op1.num_SPES ; i++ ) { r1 += resultFloats[i][0]; // Cleanup // free( results[i] ); } return r1; }
/** * Wait for a 1-word message to arrive in given mailbox. */ uint wait_mbox_message(spe_context_ptr_t ctx) { do { unsigned data; int count = spe_out_mbox_read(ctx, &data, 1); if (count == 1) { return data; } if (count < 0) { /* error */ ; } } while (1); }
void handleConsoleWrite (int chosenSpu) { unsigned int char_val, ack; if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(char_val), 1) < 0) { fprintf(stderr, "Error reading console write message\n"); exit(1); } sysConsoleWriteChar(char_val); // send back ACK ack = ACK; if (spe_in_mbox_write(global_spu_data->spus[chosenSpu].ctx, &(ack), 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { fprintf(stderr, "Error writing ack for console write message\n"); exit(1); } }
void handleConsoleLongWrite(int chosenSpu, int cmdSignal) { unsigned int long_val[2], ack; if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, long_val, 2) < 0) { fprintf(stderr, "Error reading console write int message\n"); exit(1); } sysConsoleWriteLong(*((long long *)long_val), cmdSignal - CONSOLE_WRITE_INT); // send back ACK ack = ACK; if (spe_in_mbox_write(global_spu_data->spus[chosenSpu].ctx, &(ack), 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { fprintf(stderr, "Error writing ack for console write int message\n"); exit(1); } }
void handleConsoleDoubleWrite(int chosenSpu) { unsigned int double_val[3], ack; if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, double_val, 3) < 0) { fprintf(stderr, "Error reading console write int message\n"); exit(1); } sysConsoleWriteDouble(*((double *)double_val), (int) double_val[2]); // send back ACK ack = ACK; if (spe_in_mbox_write(global_spu_data->spus[chosenSpu].ctx, &(ack), 1, SPE_MBOX_ANY_NONBLOCKING) < 0) { fprintf(stderr, "Error writing ack for console write int message\n"); exit(1); } }
double SSCAL( PyArrayObject *pyobj1, PyArrayObject *pyscalar1, unsigned int shadersize, unsigned int *shader ) { Tic(); Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.scalar[0] = pyscalar1; op1.num_SPES = speThreads; unsigned int state = 1, r; unsigned int i; //printf( "Sending states to SPEs\n" ); for ( i = 0 ; i < speThreads ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); for( i = 0 ; i < 3 ; i++ ) { printf( "%u=%f\n", i, ((float*)pyobj1->blockData[0])[i] ); } return time; }
/* * Run with three arrays, no run */ void run30nr( PyArrayObject *pyobj1, PyArrayObject *pyobj2, PyArrayObject *pyobj3, unsigned int shadersize, unsigned int *shader, unsigned int shaderindex ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs //op1.num_SPES = _GetNumberOfSPES( pyobj1->numberOfBlocks ); op1.num_SPES = 1; op1.obj[0] = pyobj1; op1.obj[1] = pyobj2; op1.obj[2] = pyobj3; unsigned int checked = 0; unsigned int state = 200 + shaderindex, r; unsigned int i; // printf( "HERE %u!\n", state ); // printf( "SIZE=%u, ADDR=%#x\n", op1.shaderSize, op1.EA_shader ); for ( i = 0 ; i < op1.num_SPES ; i++ ) { //printf( "-->%u\n",i ); spe_pointer_addr[i][shaderindex] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
extern "C" int sysVirtualSubArchProcessorBind(VM_Address procObj, int procID) { unsigned int signal[2]; // TODO - do some checking here SpuData spu = global_spu_data->spus[procID]; signal[0] = SET_PROCESSOR_REG; signal[1] = (unsigned int) procObj; // tell spu runtime its processor object if (spe_in_mbox_write(spu.ctx, signal, 2, SPE_MBOX_ANY_NONBLOCKING) < 0) { perror("Failed while trying to signal method details to Cell SPU"); exit(1); } // wait for ACK from SPU if (spe_out_intr_mbox_read(spu.ctx, signal, 1, SPE_MBOX_ALL_BLOCKING) < 0) { perror("Faied reading SPU mailbox while awaiting SPU boot"); exit(1); } if (signal[0] != ACK) { spe_out_mbox_read(spu.ctx, signal, 1); fprintf(stderr, "SPU did not ACK setProcessor, returned error no. 0x%x\n", signal[0]); exit(1); } return 0; }
/* * Run with two arrays and two scalars */ void run22( PyArrayObject *A, PyArrayObject *x, PyArrayObject *alpha, PyArrayObject *j, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; // Determining the number of SPEs op1.num_SPES = _GetNumberOfSPES( A->numberOfBlocks ); op1.obj[0] = A; op1.obj[1] = x; op1.scalar[0] = alpha; op1.scalar[1] = j; unsigned int checked = 0; unsigned int state = 0, r; unsigned int i; for ( i = 0 ; i < op1.num_SPES ; i++ ) { spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < op1.num_SPES ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } }
void internal_random(PyArrayObject *a) { //printf("adr of PyArrayObject: %x,%d\n",result,result); //Setting the PyArrayObject //result->numberOfBlocks = _numberOfBlocks*_numberOfBlocks; //result->numberOfBlocksXDim = _numberOfBlocks; //result->numberOfBlocksYDim = _numberOfBlocks; //result->blockSize = _blockSize;//in elements //result->nd = 1; //result->blockData = (char**)_malloc_align(sizeof(char*)*_numberOfBlocks*_numberOfBlocks,7); //unsigned int i = 0; //for(i = 0;i<_numberOfBlocks*_numberOfBlocks;i++) // { // result->blockData[i] = (char*)_malloc_align(sizeof(char)*_blockSize*_blockSize*4,7); // } //printf("Adr of blockData: %x\n",result->blockData); //printf("Adr of blockData: %d\n",result->blockData); //Setting the Operation object Operation_t op1; op1.shaderSize = randomfiller_size; op1.EA_shader = randomfiller; op1.obj[0] = a; //printf("randomfiller: %d\n",randomfiller); //printf("&randomfiller: %d\n",&randomfiller); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 1; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("SUM Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int r; //printf("checked is: %d\n",checked); //printf("speThreads is: %d\n",speThreads); while( checked < speThreads ) { //printf("checked is: %d\n",checked); if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish\n"); }
extern "C" int supportSPU(SpuThreadData * spu_data, SpuJavaThreadData * thread, int chosenSpu) { unsigned int cmdSignal; char stop = 0; while (!stop) { if (spe_out_intr_mbox_read(spu_data->spus[chosenSpu].ctx, &cmdSignal, 1, SPE_MBOX_ALL_BLOCKING) < 0) { perror("Failed reading while waiting for a command signal from SPU"); exit(1); } switch (cmdSignal) { case TRAP_MESSAGE: handleTrap(chosenSpu); break; case FAKE_TRAP_MESSAGE: printf("Fake Trap Called from SPU\n"); break; case FAKE_TRAP_MESSAGE_STR: handleFakeTrap(chosenSpu); break; case FAKE_TRAP_MESSAGE_INT: handleFakeTrapInt(chosenSpu); break; case CONSOLE_WRITE_CHAR: handleConsoleWrite(chosenSpu); break; case CONSOLE_WRITE_INT: case CONSOLE_WRITE_INT_BOTH: case CONSOLE_WRITE_INT_HEX: handleConsoleIntWrite(chosenSpu, cmdSignal); break; case CONSOLE_WRITE_LONG: case CONSOLE_WRITE_LONG_BOTH: case CONSOLE_WRITE_LONG_HEX: handleConsoleLongWrite(chosenSpu, cmdSignal); break; case CONSOLE_WRITE_DOUBLE: handleConsoleDoubleWrite(chosenSpu); break; case RETURN_VALUE_V: stop = 1; break; case RETURN_VALUE_I: case RETURN_VALUE_F: case RETURN_VALUE_R: if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(thread->retVal[0]), 1) < 0) { perror("Error reading return value from SPU migrated method\n"); exit(1); } stop = 1; break; case RETURN_VALUE_L_UPPER: case RETURN_VALUE_D_UPPER: if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(thread->retVal[0]), 1) < 0) { perror("Error reading return value from SPU migrated method\n"); exit(1); } break; case RETURN_VALUE_L_LOWER: case RETURN_VALUE_D_LOWER: if (spe_out_mbox_read(global_spu_data->spus[chosenSpu].ctx, &(thread->retVal[1]), 1) < 0) { perror("Error reading return value from SPU migrated method\n"); exit(1); } stop = 1; break; default: { unsigned int err = 0; fprintf(stderr, "Unknown signal recieved from SPU: 0x%x", cmdSignal); spe_out_mbox_read(spu_data->spus[chosenSpu].ctx, &err, 1); fprintf(stderr, "Error signal was: 0x%x\n", err); exit(1); } } } return 0; }
double SDOT2( PyArrayObject *pyobj1, PyArrayObject *pyobj2, unsigned int shadersize, unsigned int *shader ) { Operation_t op1; op1.shaderSize = shadersize; op1.EA_shader = shader; op1.obj[0] = pyobj1; op1.obj[1] = pyobj2; op1.num_SPES = speThreads; unsigned int state = 4; unsigned int i, r; unsigned int checked = 0; float *results[6]; // Setup SPEs for ( i = 0 ; i < speThreads ; i++ ) { results[i] = (float *)memalign( 128, ( 4 + 127 ) & ~127 ); r = (unsigned int)results[i]; spe_pointer_addr[i][0] = (unsigned int)&op1; spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); spe_in_mbox_write ( speData[i].spe_ctx, &r, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } Tic(); // Make SPEs run state = 6; for ( i = 0 ; i < speThreads ; i++ ) { spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); } // Waiting for SPEs! checked = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &r, 1 ); checked++; } } // Get result float r1 = 0.0f; for ( i = 0 ; i < speThreads ; i++ ) { r1 += results[i][0]; // Cleanup free( results[i] ); } double time = Toc_d(); PrintTicToc( "Finished at ", Toc() ); printf( "Result=%f\n", r1 ); return time; }
/* Prepare SPU for migration of a thread */ void prepareMigration(SpuThreadData * spu_data, int chosenSpu, int methodClassTocOffset, int methodSubArchOffset, VM_Address paramsStart, int paramsLength) { int i; unsigned int methodSignal[3]; unsigned int paramSignal[2]; unsigned int retSignal; unsigned int * params = (unsigned int *) paramsStart; reloadJtoc(spu_data, chosenSpu); loadTocTables(spu_data, chosenSpu); // TODO - see if we can defer this waiting (note offset required for method load) waitForProxyload(spu_data, chosenSpu); methodSignal[0] = LOAD_STATIC_METHOD; methodSignal[1] = (unsigned int) methodClassTocOffset; methodSignal[2] = (unsigned int) methodSubArchOffset; if (spe_in_mbox_write(spu_data->spus[chosenSpu].ctx, methodSignal, 3, SPE_MBOX_ANY_NONBLOCKING) < 0) { perror("Failed while trying to signal method details to Cell SPU"); exit(1); } // wait for ACK from SPU if (spe_out_intr_mbox_read(spu_data->spus[chosenSpu].ctx, &retSignal, 1, SPE_MBOX_ALL_BLOCKING) < 0) { perror("Failed reading SPU mailbox while awaiting SPU boot"); exit(1); } if (retSignal != ACK) { spe_out_mbox_read(spu_data->spus[chosenSpu].ctx, &retSignal, 1); fprintf(stderr, "SPU did not ACK method load signal, returned error no. 0x%x\n", retSignal); exit(1); } // load params for (i=0; i<paramsLength; i++) { // TODO - Double params paramSignal[0] = LOAD_WORD_PARAM; paramSignal[1] = params[paramsLength - (i + 1)]; if (spe_in_mbox_write(spu_data->spus[chosenSpu].ctx, paramSignal, 2, SPE_MBOX_ANY_NONBLOCKING) < 0) { perror("Failed while trying to signal method details to Cell SPU"); exit(1); } // wait for ACK from SPU if (spe_out_intr_mbox_read(spu_data->spus[chosenSpu].ctx, &retSignal, 1, SPE_MBOX_ALL_BLOCKING) < 0) { perror("Failed reading SPU mailbox while awaiting SPU boot"); exit(1); } if (retSignal != ACK) { spe_out_mbox_read(spu_data->spus[chosenSpu].ctx, &retSignal, 1); fprintf(stderr, "SPU did not ACK method para load signal, returned error no. 0x%x\n", retSignal); exit(1); } } }
int main(int argc, char** argv) { double begin; double end; int errnum; size_t nthread = P; size_t i; size_t nvertex; unsigned int x; // sent to each SPU int code; // status; unsigned int reply; // from SPU arg_t data[nthread]; param_t param[nthread] A16; argc = argc; // to silence gcc... progname = argv[0]; nvertex = atoi(argv[2]); printf("nthread = %zu\n", nthread); printf("nvertex = %zu\n", nvertex); printf("ctx = %zu\n", sizeof(param_t)); printf("arg = %zu\n", sizeof(arg_t)); begin = sec(); for (i = 0; i < nthread; ++i) { param[i].proc = i; param[i].nvertex = nvertex; if ((data[i].ctx = spe_context_create (0, NULL)) == NULL) { perror ("Failed creating context"); exit(1); } if (spe_program_load (data[i].ctx, &dataflow)) { perror ("Failed loading program"); exit(1); } data[i].arg = ¶m[i]; printf("i=%d param=%p\n", i, data[i].arg); if (pthread_create (&data[i].pthread, NULL, work, &data[i])) { perror ("Failed creating thread"); exit(1); } } // send some data to each SPU and wait for a reply. x = 42; for (i = 0; i < nthread; ++i) { reply = 0; code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-0: %u\tcode: %d\n",i, reply, code); code = spe_in_mbox_write(data[i].ctx, &x, 1, 1); code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-1: %u\tcode: %d\n",i, reply, code); code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-2: %u\tcode: %d\n",i, reply, code); } end = sec(); printf("%1.3lf s\n", end-begin); for (i = 0; i < nthread; ++i) { printf("joining with PPU pthread %zu...\n", i); errnum = pthread_join(data[i].pthread, NULL); if (errnum != 0) syserror(errnum, "pthread_join failed"); if (spe_context_destroy (data[i].ctx) != 0) { perror("Failed destroying context"); exit(1); } } return 0; }
unsigned int internal_sum(long a) { //printf("Inside internal add\n"); //Setting the Operation object Operation_t op1; op1.shaderSize = arraysum_size; op1.EA_shader = arraysum; op1.obj[0] = a; //printf("arraymultiply: %d\n",arraydivide); //printf("&arraymultiply: %d\n",&arraydivide); op1.num_SPES = speThreads; unsigned int i = 0; for(i = 0;i<speThreads;i++) { spe_pointer_addr[i][0] = &op1; } //printf("Adr of op1 sat. It is: %x\n",&op1); unsigned int state = 2; unsigned int y; for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending state to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } //printf("ADD Waiting for the SPE's\n");//ALSO, check that the structure used is correct! // Waiting for SPEs! //printf( "Waiting for SPEs\n" ); unsigned int checked = 0; unsigned int signal; //float r; //printf("1. r[0]: %f\n",resultFloats[0][0]); //printf("1. adr is: %x\n",&(resultFloats[0][0])); unsigned int pointersToResultFloats[speThreads]; unsigned int e; for(e=0;e<speThreads;e++) { pointersToResultFloats[e] = &(resultFloats[e][0]); } for ( y = 0 ; y < speThreads ; y++ ) { //printf("Sending res pointers to disp\n"); spe_in_mbox_write ( speData[y].spe_ctx, &pointersToResultFloats[y], 1, SPE_MBOX_ALL_BLOCKING ); //printf("TO THE DISP. THE SUM STATE: %d, has just been sent\n",&state); //printf("This is the actual value: %d\n",state); } float finalRes = 0; while( checked < speThreads ) { if ( spe_out_mbox_status( speData[checked].spe_ctx ) ) { spe_out_mbox_read( speData[checked].spe_ctx, &signal, 1 ); checked++; //printf("Something read on the inbox\n"); } } //printf("Done waiting on threads to finish in DIV\n"); //printf("2. r[0]: %f\n",resultFloats[y][0]); //Add all the results together unsigned int u = 0; for(u=0;u<speThreads;u++) { finalRes = finalRes+resultFloats[u][0]; } return finalRes; }