void *ppu_pthread_function(void *thread_arg) { spe_context_ptr_t ctx; struct package_t *arg = (struct package_t *) thread_arg; /* Create SPE context */ if ((ctx = spe_context_create (0, NULL)) == NULL) { perror ("Failed creating context"); exit (1); } /* Load SPE program into context */ if (spe_program_load (ctx, &lab8_spu)) { perror ("Failed loading program"); exit (1); } /* Run SPE context */ unsigned int entry = SPE_DEFAULT_ENTRY; /* transferul adresei structurii initiale */ if (spe_context_run(ctx, &entry, 0, (void *)arg, (void *)sizeof(struct package_t), NULL) < 0) { perror ("Failed running context"); exit (1); } /* Destroy context */ if (spe_context_destroy (ctx) != 0) { perror("Failed destroying context"); exit (1); } pthread_exit(NULL); }
int main() { spe_context_ptr_t ctx; unsigned int entry = SPE_DEFAULT_ENTRY; init_matrix(); if((ctx = spe_context_create(0, NULL)) == NULL) { perror ("Failed creating context"); exit (1); } if(spe_program_load(ctx, &lab10_spu)) { perror ("Failed loading program"); exit (1); } printf("SPU:\n"); if(spe_context_run(ctx, &entry, 0, (void*)&v, (void*)M, NULL) < 0) { perror ("Failed running context"); exit (1); } if(spe_context_destroy(ctx) != 0) { perror("Failed destroying context"); exit (1); } printf("PPU:\n"); printf("received: %d %d \n", v[0][0].x, v[0][0].y); printf("correct: %d\n", (destination.x == v[0][0].x) && (destination.y == v[0][0].y)); return 0; }
int CreateSPEThread( PpuPthreadData_t *spedata, spe_program_handle_t *context, void *myarg ) { // create SPE context if ( ( spedata->spe_ctx = spe_context_create ( 0, NULL ) ) == NULL ) { perror ( "Failed creating context" ); return -1; } // Load program into context if ( spe_program_load ( spedata->spe_ctx, context ) ) { perror ( "Failed loading program" ); return -1; } // Initialize context run data spedata->entry = SPE_DEFAULT_ENTRY; //speData[i].argp = mydata; spedata->argp = myarg; // Create pthread for each of the SPE conexts if ( pthread_create ( &spedata->pthread, NULL, &PpuPthreadFunction, spedata ) ) { perror ( "Failed creating thread" ); return -1; } return 1; }
int main() { spe_context_ptr_t speid; unsigned int flags = 0; unsigned int entry = SPE_DEFAULT_ENTRY; void *argp = NULL; void *envp = NULL; spe_stop_info_t stop_info; int rc; speid = spe_context_create(0,NULL); if (speid==NULL) { perror("spe_context_create"); return -1; } //Load SPE executable object into the SPE context local store if (spe_program_load(speid, &hello_spu)) { perror("spe_program_load"); return -2; } //Run the SPE context rc = spe_context_run(speid, &entry, flags, argp, envp, &stop_info); if (rc<0) perror("spe_context_run"); //Destroy the SPE context spe_context_destroy(speid); return 0; }
/** * Create and start several threads on the SPEs * @param nprocs Number of threads to start */ void create_spe_pthreads(fixedgrid_t* G) { uint32_t i; for(i=0; i<G->nprocs; i++) { /* Configure environment */ G->threads[i].envv.speid = i; G->threads[i].envv.nprocs = G->nprocs; G->threads[i].envv.metptr = (uint32_t)(&G->threads[i].metrics); /* Create context */ if((G->threads[i].speid = spe_context_create(0, NULL)) == NULL) { fprintf(stderr, "Failed spe_context_create (errno=%d)\n", errno); exit(1); } /* Load program into context */ if(spe_program_load(G->threads[i].speid, &fixedgrid_spu)) { fprintf(stderr, "Failed spe_program_load (errno=%d)\n", errno); exit(1); } /* Create thread for each SPE context */ if(pthread_create(&G->threads[i].pthread, NULL, &spe_pthread_function, &G->threads[i])) { fprintf(stderr, "Failed pthread_create (errno=%d)\n", errno); exit(1); } G->threads[i].status = SPE_STATUS_INIT; } }
int main(int argc, char **argv) { int i; int ret; spe_context_ptr_t spe; spe_program_handle_t *prog; unsigned int entry; spe_stop_info_t stop_info; prog = spe_image_open("vec_abs_spe.elf"); if (!prog) { perror("spe_image_open"); exit(1); } spe = spe_context_create(0, NULL); if (!spe) { perror("spe_context_create"); exit(1); } ret = spe_program_load(spe, prog); if (ret) { perror("spe_program_load"); exit(1); } abs_params.ea_in = (unsigned long) in; abs_params.ea_out = (unsigned long) out; abs_params.size = SIZE; entry = SPE_DEFAULT_ENTRY; ret = spe_context_run(spe, &entry, 0, &abs_params, NULL, &stop_info); if (ret < 0) { perror("spe_context_run"); exit(1); } ret = spe_context_destroy(spe); if (ret) { perror("spe_context_destroy"); exit(1); } ret = spe_image_close(prog); if (ret) { perror("spe_image_close"); exit(1); } for (i = 0; i < SIZE; i++) { printf("out[%02d]=%0.0f\n", i, out[i]); } return 0; }
int main(int argc, char **argv) { int i; int ret; spe_context_ptr_t spe[NUM_SPE]; spe_program_handle_t *prog; pthread_t thread[NUM_SPE]; prog = spe_image_open("increment_spe.elf"); if (!prog) { perror("spe_image_open"); exit(1); } for (i = 0; i < NUM_SPE; i++) { spe[i] = spe_context_create(0, NULL); if (!spe) { perror("spe_context_create"); exit(1); } ret = spe_program_load(spe[i], prog); if (ret) { perror("spe_program_load"); exit(1); } } for (i = 0; i < NUM_SPE; i++) { ret = pthread_create(&thread[i], NULL, run_increment_spe, &spe[i]); if (ret) { perror("pthread_create"); exit(1); } } for (i = 0; i < NUM_SPE; i++) { pthread_join(thread[i], NULL); ret = spe_context_destroy(spe[i]); if (ret < 0) { perror("spe_context_destroy"); exit(1); } } ret = spe_image_close(prog); if (ret) { perror("spe_image_close"); exit(1); } printf("result=%d\n", counter[0]); return 0; }
/** * Create the SPU threads. This is done once during driver initialization. * This involves setting the "init" message which is sent to each SPU. * The init message specifies an SPU id, total number of SPUs, location * and number of batch buffers, etc. */ void cell_start_spus(struct cell_context *cell) { static boolean one_time_init = FALSE; uint i, j; uint timebase = get_timebase(); if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " "on Cell.\n"); abort(); } one_time_init = TRUE; assert(cell->num_spus <= CELL_MAX_SPUS); ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); /* * Initialize the global 'inits' structure for each SPU. * A pointer to the init struct will be passed to each SPU. * The SPUs will then each grab their init info with mfc_get(). */ for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; cell_global.inits[i].inv_timebase = 1000.0f / timebase; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; } cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; cell_global.inits[i].spu_functions = &cell->spu_functions; cell_global.spe_contexts[i] = spe_context_create(0, NULL); if (!cell_global.spe_contexts[i]) { fprintf(stderr, "spe_context_create() failed\n"); exit(1); } if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { fprintf(stderr, "spe_program_load() failed\n"); exit(1); } pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ NULL, /* pthread attribs */ &cell_thread_function, /* start routine */ &cell_global.inits[i]); /* thread argument */ } }
int main() { int i, spu_threads; spe_context_ptr_t ctxs[MAX_SPU_THREADS]; pthread_t threads[MAX_SPU_THREADS]; /* * Determine the number of SPE threads to create. */ spu_threads = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); if (spu_threads > MAX_SPU_THREADS) spu_threads = MAX_SPU_THREADS; /* * Create several SPE-threads to execute 'simple_spu'. */ for(i=0; i<spu_threads; i++) { /* Create context */ if ((ctxs[i] = spe_context_create (0, NULL)) == NULL) { perror ("Failed creating context"); exit (1); } /* Load program into context */ if (spe_program_load (ctxs[i], &simple_spu)) { perror ("Failed loading program"); exit (1); } /* Create thread for each SPE context */ if (pthread_create (&threads[i], NULL, &ppu_pthread_function, &ctxs[i])) { perror ("Failed creating thread"); exit (1); } } /* Wait for SPU-thread to complete execution. */ for (i=0; i<spu_threads; i++) { if (pthread_join (threads[i], NULL)) { perror("Failed pthread_join"); exit (1); } /* Destroy context */ if (spe_context_destroy (ctxs[i]) != 0) { perror("Failed destroying context"); exit (1); } } printf("\nThe program has successfully executed.\n"); return 0; }
void * spe_thread (void * arg) { int flags = 0; unsigned int entry = SPE_DEFAULT_ENTRY; spe_context_ptr_t *ctx = (spe_context_ptr_t *) arg; spe_program_load (*ctx, &coremaker_spu); spe_context_run (*ctx, &entry, flags, NULL, NULL, NULL); pthread_exit (NULL); }
int main(int argc, char **argv) { int ret; spe_context_ptr_t spe; spe_program_handle_t *prog; unsigned int entry; spe_stop_info_t stop_info; unsigned long param; prog = spe_image_open("print_param_spe.elf"); if (!prog) { perror("spe_image_open"); exit(1); } spe = spe_context_create(0, NULL); if (!spe) { perror("spe_context_create"); exit(1); } ret = spe_program_load(spe, prog); if (ret) { perror("spe_program_load"); exit(1); } param = 12345678; printf("[PPE] param=%ld\n", param); entry = SPE_DEFAULT_ENTRY; ret = spe_context_run(spe, &entry, 0, (void *) param, NULL, &stop_info); if (ret < 0) { perror("spe_context_run"); exit(1); } ret = spe_context_destroy(spe); if (ret) { perror("spe_context_destroy"); exit(1); } ret = spe_image_close(prog); if (ret) { perror("spe_image_close"); exit(1); } return 0; }
int indirect_handler (unsigned char *base, unsigned long offset) { int flags = 0; unsigned int entry = SPE_DEFAULT_ENTRY; spe_context_ptr_t ctx = spe_context_create (0, NULL); spe_program_load (ctx, &bt2_spu); spe_context_run (ctx, &entry, flags, NULL, NULL, NULL); return 0; }
/* Start the Spu threads */ void startSpuThreads(int spu_threads, SpuThreadData * spu_data) { int i, no_spus; /* Determine the number of SPE threads to create */ no_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); if (spu_threads < 0) { spu_threads = no_spus; } else if (no_spus < spu_threads) { spu_threads = no_spus; printf("Warning: Only %i Cell SPU processors available\n", spu_threads); } spu_data->no_spu_threads = spu_threads; spu_data->spus = (SpuData *) malloc(sizeof(SpuData) * spu_threads); if ((spu_data->spus == NULL)) { perror("Failed to allocate SPU data for threads"); } printf("Bringing up %i Cell SPU threads\n", spu_threads); /* create the context gang */ if ((spu_data->gang = spe_gang_context_create(0)) == NULL) { perror("Failed creating Cell SPU gang context"); exit(1); } for(i=0; i<spu_threads; i++) { /* Create context */ if ((spu_data->spus[i].ctx = spe_context_create (CTX_FLAGS, spu_data->gang)) == NULL) { perror ("Failed creating Cell SPU context"); exit (1); } /* load bootloader into spu's */ if (spe_program_load (spu_data->spus[i].ctx, &cellspu_bootloader)) { perror ("Failed loading Cell SPU bootloader"); exit (1); } /* create a thread for each SPU */ if (pthread_create (&(spu_data->spus[i].boot_thread), NULL, &spu_bootstrap_thread, &(spu_data->spus[i].ctx))) { perror ("Failed creating Cell SPU thread"); exit (1); } } }
/** Sends a SPECommand to the SPE. * * \param command A SPEcommand. * \return Returns nonzero on error. */ static int submitSPECommand(SPECommand* command) { #ifdef MAIL /* Call the SPU Program*/ writeMailBox( (ppu_addr_t)command ); return readMailBox(); #else if( !spe_context ){ spe_context = spe_context_create( 0, NULL ); spe_program_load( spe_context, &spe_dynprogr_handle ); } unsigned int entry = SPE_DEFAULT_ENTRY; return spe_context_run( spe_context, &entry, 0, command, NULL, NULL ); /* spe_context_destroy( spe_context ); */ #endif }
static void * sws_spe_thread(void * arg) { struct yuvscaler_s * arg_ptr; arg_ptr=(struct yuvscaler_s *) arg; // spe_program_handle_t * program; // // program = spe_image_open("spu_yuvscaler"); if (spe_program_load(arg_ptr->ctx, &spu_yuvscaler_handle) < 0) { perror("error loading program"); pthread_exit(NULL); } spe_context_run(arg_ptr->ctx, &arg_ptr->entry, arg_ptr->runflags,arg_ptr->argp,arg_ptr->envp, NULL); pthread_exit(NULL); }
spe_context_ptr_t ps3_assign_context_to_program(spe_program_handle_t *program) { static spe_context_ptr_t cached_context; static spe_program_handle_t *cached_program; static int cached_pid; int current_pid = getpid(); int thread_index = 99; /* Todo: get true cruncher index */ int retval; if (cached_context) { if (cached_pid != current_pid) { Log("!!! FATAL !!! Cached SPE context forked from another pid (%d)\n", cached_pid); abort(); } if (cached_program != program) { // Log("Replacing SPE context because SPE program changed\n"); if (spe_context_destroy(cached_context)) Log("Alert SPE%d! spe_context_destroy() failed, errno=%d\n", thread_index, errno); cached_context = NULL; } } if (cached_context == NULL) { cached_context = spe_context_create(0, NULL); if (cached_context == NULL) { Log("Alert SPE#%d! spe_context_create() failed\n", thread_index); abort(); } retval = spe_program_load(cached_context, program); if (retval != 0) { Log("Alert SPE#%d: spe_program_load() returned %d\n", thread_index, retval); abort(); } cached_program = program; cached_pid = current_pid; } return cached_context; }
void *pthread_run_spe(void *arg){ spe_context_ptr_t spe_ctx; context *data = (context *)arg; void *argp; unsigned int entry; spe_ctx = spe_context_create(0, NULL); spe_program_load (spe_ctx, &spu_pi); entry=SPE_DEFAULT_ENTRY; argp=data; spe_context_run(spe_ctx, &entry,0,argp,NULL,NULL); spe_context_destroy(spe_ctx); pthread_exit(NULL); }
int main(int argc, char **argv) { int ret; spe_context_ptr_t ctx; unsigned int entry_point; spe_stop_info_t stop_info; /* Display the EA of the array */ printf("PPU array location: %#llx\n", (unsigned long long)prime); /* Create the SPE Context */ ctx = spe_context_create(0, NULL); if (!ctx) { perror("spe_context_create"); exit(1); } /* Load the program into the context */ ret = spe_program_load(ctx, &spu_prime_handle); if (ret) { perror("spe_program_load"); exit(1); } /* Run the program */ entry_point = SPE_DEFAULT_ENTRY; ret = spe_context_run(ctx, &entry_point, 0, NULL, NULL, &stop_info); if (ret < 0) { perror("spe_context_run"); exit(1); } /* Deallocate the context */ ret = spe_context_destroy(ctx); if (ret) { perror("spe_context_destroy"); exit(1); } return 0; }
//PPU Code int main(void){ int retval; unsigned int entry_point = SPE_DEFAULT_ENTRY; // Required for continuing //execution, SPE_DEFAULT_ENTRY is the standard starting offset. spe_context_ptr_t my_context; spe_stop_info_t stopinfo; int stop_counter = 0; spe_callback_handler_register(null_callback, 0x11, SPE_CALLBACK_NEW); while(true) { // Create the SPE Context my_context = spe_context_create(SPE_EVENTS_ENABLE|SPE_MAP_PS, NULL); // Load the embedded code into this context spe_program_load(my_context, &spe_program_zero); entry_point = SPE_DEFAULT_ENTRY; do { printf("before running the spu code\n"); retval = spe_context_run(my_context, &entry_point, 0, NULL, NULL, &stopinfo); /* consume the stop info so we don't get the spu_stop in loop bug */ spe_stop_info_read(my_context, &stopinfo); stop_counter++; printf("after running the spu code (%d)\n", stop_counter); printf("retval = %d\n", retval); if(retval == 0x10) /* spu_stop(0x10) is sent from the spe when the loop is done */ { break; } } while (retval > 0); // Run until exit or error spe_context_destroy(my_context); } printf("finished with computation\n"); }
void *ppu_pthread_function(void *thread_arg) { spe_context_ptr_t ctx; pointers_t *arg = (pointers_t *) thread_arg; /* Create SPE context */ if ((ctx = spe_context_create (0, NULL)) == NULL) { perror ("Failed creating context"); exit (1); } /* Load SPE program into context */ if (spe_program_load (ctx, &ex1_spu)) { perror ("Failed loading program"); exit (1); } pthread_t mbox_thread; if (pthread_create (&mbox_thread, NULL, &mailbox_pthread_function, &ctx)) { perror ("Failed creating thread"); exit (1); } /* Run SPE context */ unsigned int entry = SPE_DEFAULT_ENTRY; if (spe_context_run(ctx, &entry, 0, arg, (void*)sizeof(pointers_t), NULL) < 0) { perror ("Failed running context"); exit (1); } /* Destroy context */ if (spe_context_destroy (ctx) != 0) { perror("Failed destroying context"); exit (1); } return NULL; }
int CreateSPEContext( PpuPthreadData_t *spedata, spe_program_handle_t *context, void *myarg ) { // create SPE context if ( ( spedata->spe_ctx = spe_context_create ( 0, NULL ) ) == NULL ) { perror ( "Failed creating context" ); return -1; } // Load program into context if ( spe_program_load ( spedata->spe_ctx, context ) ) { perror ( "Failed loading program" ); return -1; } // Initialize context run data spedata->entry = SPE_DEFAULT_ENTRY; //speData[i].argp = mydata; spedata->argp = myarg; return 1; }
void *spe_code_launch_6(void *data) { // printf("inside of thread function\n"); int retval; unsigned int entry_point = SPE_DEFAULT_ENTRY; /* Required for continuing execution, SPE_DEFAULT_ENTRY is the standard starting offset. */ spe_context_ptr_t my_context; // printf("before creating context\n"); /* Create the SPE Context */ my_context = spe_context_create(SPE_EVENTS_ENABLE|SPE_MAP_PS, NULL); // printf("context created\n"); /* Load the embedded code into this context */ spe_program_load(my_context, &spe_code); // printf("program loaded\n"); /* Run the SPE program until completion */ do { retval = spe_context_run(my_context, &entry_point, 0, spe6_Data, 6, NULL); } while (retval > 0); /* Run until exit or error */ spe_context_destroy(my_context); pthread_exit(NULL); }
int SPE_Boot(_THIS, spu_data_t * spe_data) { deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name); spe_data->ctx = spe_context_create(0, NULL); if (spe_data->ctx == NULL) { deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name); SDL_SetError("[PS3->SPU] Failed creating SPE context"); return -1; } deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name); if (spe_program_load(spe_data->ctx, &spe_data->program)) { deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name); SDL_SetError ("[PS3->SPU] Failed loading program into SPE context"); return -1; } spe_data->booted = 1; deprintf(2, "[PS3->SPU] SPE boot successful\n"); return 0; }
int main(int argc, char** argv) { double begin; double end; int errnum; size_t nthread = P; size_t i; size_t nvertex; unsigned int x; // sent to each SPU int code; // status; unsigned int reply; // from SPU arg_t data[nthread]; param_t param[nthread] A16; argc = argc; // to silence gcc... progname = argv[0]; nvertex = atoi(argv[2]); printf("nthread = %zu\n", nthread); printf("nvertex = %zu\n", nvertex); printf("ctx = %zu\n", sizeof(param_t)); printf("arg = %zu\n", sizeof(arg_t)); begin = sec(); for (i = 0; i < nthread; ++i) { param[i].proc = i; param[i].nvertex = nvertex; if ((data[i].ctx = spe_context_create (0, NULL)) == NULL) { perror ("Failed creating context"); exit(1); } if (spe_program_load (data[i].ctx, &dataflow)) { perror ("Failed loading program"); exit(1); } data[i].arg = ¶m[i]; printf("i=%d param=%p\n", i, data[i].arg); if (pthread_create (&data[i].pthread, NULL, work, &data[i])) { perror ("Failed creating thread"); exit(1); } } // send some data to each SPU and wait for a reply. x = 42; for (i = 0; i < nthread; ++i) { reply = 0; code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-0: %u\tcode: %d\n",i, reply, code); code = spe_in_mbox_write(data[i].ctx, &x, 1, 1); code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-1: %u\tcode: %d\n",i, reply, code); code = spe_out_mbox_read(data[i].ctx, &reply, 1); printf("spu-%d reply-2: %u\tcode: %d\n",i, reply, code); } end = sec(); printf("%1.3lf s\n", end-begin); for (i = 0; i < nthread; ++i) { printf("joining with PPU pthread %zu...\n", i); errnum = pthread_join(data[i].pthread, NULL); if (errnum != 0) syserror(errnum, "pthread_join failed"); if (spe_context_destroy (data[i].ctx) != 0) { perror("Failed destroying context"); exit(1); } } return 0; }
int main(int argc, char **argv) { int i; int ret; spe_context_ptr_t spe; spe_program_handle_t *prog; unsigned int entry; spe_stop_info_t stop_info; if (argc == 1) { fprintf(stderr, "usage: %s <spu_image>\n", argv[0]); return -1; } prog = spe_image_open(argv[1]); if (!prog) { perror("spe_image_open"); exit(1); } spe = spe_context_create(0, NULL); if (!spe) { perror("spe_context_create"); exit(1); } ret = spe_program_load(spe, prog); if (ret) { perror("spe_program_load"); exit(1); } abs_params.ea_in = (unsigned long) in; abs_params.ea_out = (unsigned long) out; abs_params.size = SIZE; entry = SPE_DEFAULT_ENTRY; ret = spe_context_run(spe, &entry, 0, &abs_params, NULL, &stop_info); if (ret < 0) { perror("spe_context_run"); exit(1); } ret = spe_context_destroy(spe); if (ret) { perror("spe_context_destroy"); exit(1); } ret = spe_image_close(prog); if (ret) { perror("spe_image_close"); exit(1); } for (i = 0; i < SIZE; i++) { printf("%5.0f ", i, out[i]); if ((i+1) % 4 == 0) printf("\n"); } return 0; }
///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded) void SpuLibspe2Support::internal_startSPU() { m_activeSpuStatus.resize(numThreads); for (int i=0; i < numThreads; i++) { if(data[i].context == NULL) { /* Create context */ if ((data[i].context = spe_context_create(0, NULL)) == NULL) { perror ("Failed creating context"); exit(1); } /* Load program into context */ if(spe_program_load(data[i].context, this->program)) { perror ("Failed loading program"); exit(1); } m_activeSpuStatus[i].m_status = Spu_Status_Startup; m_activeSpuStatus[i].m_taskId = i; m_activeSpuStatus[i].m_commandId = 0; m_activeSpuStatus[i].m_lsMemory.p = NULL; data[i].entry = SPE_DEFAULT_ENTRY; data[i].flags = 0; data[i].argp.p = &m_activeSpuStatus[i]; data[i].envp.p = NULL; /* Create thread for each SPE context */ if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) )) { perror ("Failed creating thread"); exit(1); } /* else { printf("started thread %d\n",i); }*/ } } for (int i=0; i < numThreads; i++) { if(data[i].context != NULL) { while( m_activeSpuStatus[i].m_status == Spu_Status_Startup) { // wait for spu to set up sched_yield(); } printf("Spu %d is ready\n", i); } } }
int main(int argc, char** argv) { /* Iterators */ int i, j, k; uint32_t block; /* Time (seconds) */ long t_0; long t_end; long dt; long steps; long iter; /* Emission control */ bool emflag = TRUE; /* Start wall clock timer */ timer_start(TIMER_WALLCLOCK); /* Initialize parallelization */ nprocs = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); nprocs = nprocs > MAX_THREADS ? MAX_THREADS : nprocs; if(argc > 1) { i = atoi(argv[1]); if(i < 1) { fprintf(stderr, "Invalid number of SPUs: %d < 1.\n", i); exit(1); } if(i < nprocs) { nprocs = i; } else { printf("%d SPUs unavailable. Using %d instead.\n", i, nprocs); } } /* Create SPE threads */ for(i=0; i<nprocs; i++) { threads[i].argp = (void*)(&spe_argvs[i]); /* Create context */ if((threads[i].speid = spe_context_create(0, NULL)) == NULL) { fprintf(stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit(1); } /* Load program into context */ if(spe_program_load(threads[i].speid, &fixedgrid_spu)) { fprintf(stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno)); exit(1); } /* Create thread for each SPE context */ if(pthread_create(&threads[i].pthread, NULL, &ppu_pthread_function, &threads[i])) { fprintf(stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit(1); } spe_set_status(i, SPE_STATUS_WAITING); } printf("\nRunning %d threads (%d SPU + 1 PPU).\n", (nprocs+1), nprocs); /* Allocate concentration memory */ //conc = _malloc_align(NROWS*NCOLS*sizeof(double), 7); //conc_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7); /* Allocation wind vector filed memory */ //wind_u = _malloc_align(NROWS*NCOLS*sizeof(double), 7); //wind_v = _malloc_align(NROWS*NCOLS*sizeof(double), 7); //wind_u_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7); //wind_v_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7); /* Allocation diffusion tensor memory */ //diff = _malloc_align(NROWS*NCOLS*sizeof(double), 7); //diff_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7); /* Initialize concentration data */ double_array_init(NROWS*NCOLS, conc, O3_INIT); /* Initialize wind field */ double_array_init(NROWS*NCOLS, wind_u, WIND_U_INIT); double_array_init(NROWS*NCOLS, wind_v, WIND_V_INIT); /* Initialize diffusion field */ double_array_init(NROWS*NCOLS, diff, DIFF_INIT); /* Initialize time */ t_0 = 0.0; t_end = year2sec(END_YEAR - START_YEAR) + day2sec(END_DOY - START_DOY) + hour2sec(END_HOUR - START_HOUR) + minute2sec(END_MIN - START_MIN); dt = STEP_SIZE; steps = (long)( (t_end - t_0)/dt ); /* Print startup banner */ print_start_banner(NX*DX, NY*DY, 0.0, t_end, steps); /* Store initial concentration */ write_conc(&(conc[0]), 0, 0); /* BEGIN CALCULATIONS */ for(iter = 1; iter <= steps; iter++) { emflag = iter*dt < 6*3600.0 ? TRUE : FALSE; timer_start(TIMER_ROW_DISCRET); /* Discretize rows 1/2 timestep */ block = NROWS / nprocs; for(i=0; i<nprocs; i++) { /* Configure SPE arguments */ spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]); spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]); spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]); spe_argvs[i].arg[3].dbl = dt/2; spe_argvs[i].arg[4].dbl = DX; spe_argvs[i].arg[5].u32[0] = NX; spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block); //FIXME /* Signal SPE */ spe_set_status(i, SPE_STATUS_WORKING); } /* Wait for SPEs to finish */ wait_all_spes(); timer_stop(TIMER_ROW_DISCRET); timer_start(TIMER_COL_DISCRET); /* Discretize colums 1 timestep */ for(i=0; i<NCOLS; i++) { k = i % nprocs; while(spe_get_status(k) > 0) ; //intentional wait if(i >= nprocs) { timer_start(TIMER_ARRAY_COPY); for(j=0; j<NY; j++) { conc[i-nprocs + j*NX] = ccol[k*NY+j]; } timer_stop(TIMER_ARRAY_COPY); } timer_start(TIMER_ARRAY_COPY); for(j=0; j<NY; j++) { ccol[k*NY + j] = conc[i + j*NX]; wcol[k*NY + j] = wind_v[i + j*NX]; dcol[k*NY + j] = diff[i + j*NX]; } timer_stop(TIMER_ARRAY_COPY); // Configure SPE arguments spe_argvs[k].arg[0].u64 = (uint64_t)(&ccol[k*NY]); spe_argvs[k].arg[1].u64 = (uint64_t)(&wcol[k*NY]); spe_argvs[k].arg[2].u64 = (uint64_t)(&dcol[k*NY]); spe_argvs[k].arg[3].dbl = dt; spe_argvs[k].arg[4].dbl = DY; spe_argvs[k].arg[5].u32[0] = NY; spe_argvs[k].arg[5].u32[1] = 1; // Signal SPE spe_set_status(k, SPE_STATUS_WORKING); } /* Wait for SPEs to finish */ wait_all_spes(); timer_stop(TIMER_COL_DISCRET); timer_start(TIMER_ROW_DISCRET); /* Discretize rows 1/2 timestep */ block = NROWS / nprocs; for(i=0; i<nprocs; i++) { /* Configure SPE arguments */ spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]); spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]); spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]); spe_argvs[i].arg[3].dbl = dt/2; spe_argvs[i].arg[4].dbl = DX; spe_argvs[i].arg[5].u32[0] = NX; spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block); //FIXME /* Signal SPE */ spe_set_status(i, SPE_STATUS_WORKING); } /* Wait for SPEs to finish */ wait_all_spes(); timer_stop(TIMER_ROW_DISCRET); /* * Could update wind field here... */ /* * Could update diffusion tensor here... */ /* Add emissions */ if(emflag) { conc[SOURCE_Y*NX + SOURCE_X] += dt * (SOURCE_RATE) / (DX * DY * 1000.0); } /* Store concentration */ #ifdef WRITE_EACH_ITER write_conc(conc, iter, 0); #endif /* Indicate progress */ if(iter % 10 == 0) { printf("Iteration %ld of %ld. Time = %ld seconds.\n", iter, steps, iter*dt); } } /* END CALCULATIONS */ /* Wait for SPU-thread to complete execution. */ for(i=0; i<nprocs; i++) { spe_set_status(i, SPE_STATUS_STOPPED); if(pthread_join(threads[i].pthread, NULL)) { perror("Failed pthread_join"); exit(1); } } /* Store concentration */ write_conc(conc, iter-1, 0); /* Show final time */ printf("Final time: %ld seconds.\n", (iter-1)*dt); timer_stop(TIMER_WALLCLOCK); print_timer_summary("===PPU Timers==="); /* Cleanup and exit */ return 0; }
int main(int argc, char **argv) { int i, retval, spus; /* Determine number of available SPUs */ spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0); if (argc != 2) { printf("Usage: 'ppu_threads <1-%u>'\n", spus); exit(1); } else if ((atoi(argv[1]) < 1) || (atoi(argv[1]) > spus)) { printf("Usage: 'ppu_threads <1-%u>'\n", spus); exit(1); } else { spus = atoi(argv[1]); } /* Create a context and thread for each SPU */ for (i=0; i<spus; i++) { /* Create context */ if ((data[i].speid = spe_context_create(0, NULL)) == NULL) { perror("spe_context_create"); exit(1); } /* Load program into the context */ if ((retval = spe_program_load(data[i].speid, &spu_threads)) != 0) { perror("spe_program_load"); exit (1); } /* Initialize control block and thread data */ control_block = i; data[i].argp = (void*)control_block; /* Create thread */ if ((retval = pthread_create( &data[i].pthread, NULL, &ppu_pthread_function, &data[i])) != 0) { perror("pthread_create"); exit (1); } } /* Wait for the threads to finish processing */ for (i = 0; i < spus; i++) { if ((retval = pthread_join(data[i].pthread, NULL)) != 0) { perror("pthread_join"); exit (1); } if ((retval = spe_context_destroy (data[i].speid)) != 0) { perror("spe_context_destroy"); exit (1); } } return 0; }
float calc_integral(float start, float end, float delta) { int i; int ret; float sum = 0.0f; spe_program_handle_t *prog; spe_context_ptr_t spe[NUM_SPE]; pthread_t thread[NUM_SPE]; thread_arg_t arg[NUM_SPE]; prog = spe_image_open("integral_spe.elf"); if (!prog) { perror("spe_image_open"); exit(1); } for (i = 0; i < NUM_SPE; i++) { spe[i] = spe_context_create(0, NULL); if (!spe) { perror("spe_context_create"); exit(1); } ret = spe_program_load(spe[i], prog); if (ret) { perror("spe_program_load"); exit(1); } } for (i = 0; i < NUM_SPE; i++) { integral_params[i].start = start + (end-start)/NUM_SPE * i; integral_params[i].end = start + (end-start)/NUM_SPE * (i+1); integral_params[i].delta = delta; integral_params[i].sum = 0.0f; arg[i].spe = spe[i]; arg[i].integral_params = &integral_params[i]; ret = pthread_create(&thread[i], NULL, run_integral_spe, &arg[i]); if (ret) { perror("pthread_create"); exit(1); } } for (i = 0; i < NUM_SPE; i++) { pthread_join(thread[i], NULL); ret = spe_context_destroy(spe[i]); if (ret) { perror("spe_context_destroy"); exit(1); } } ret = spe_image_close(prog); if (ret) { perror("spe_image_close"); exit(1); } for (i = 0; i < NUM_SPE; i++) { printf("[PPE] sum = %f\n", integral_params[i].sum); sum += integral_params[i].sum; } return sum; }
int main( int argc, char *argv[] ) { int i, j, dummy; int tmi, tmj; pthread_t threads [ NUM_THREADS ]; spe_context_ptr_t spe_contexts[ NUM_THREADS ]; thread_args_t thread_args [ NUM_THREADS ]; int rows; dummy = argc; dummy = (int)argv; // initialize initial & final matrix for(i = 0; i < tsize; i++) { for(j = 0; j < tsize; j++) { tmi = tsize-i; tmj = tsize-j; Amatrix[i][j] = 3*tmi+tmj ; Bmatrix[i][j] = 3*tmi+tmj ; Cmatrix[i][j] = 0 ; Dmatrix[i][j] = 0 ; } } // perform multiply printf( "SPE: Multiply \n"); gettimeofday( &time0, &tzone ); // start jobs rows = ((tsize/32)+NUM_THREADS-1)/NUM_THREADS ; // determine amount of work each spe should do for (i = 0; i < NUM_THREADS; i++ ) { // set arguments args.Amat = (float (*)[tsize][tsize])Amatrix ; args.Bmat = (float (*)[tsize][tsize])Bmatrix ; args.Cmat = (float (*)[tsize][tsize])Cmatrix ; args.i_initial = i*rows ; spe_contexts[i] = spe_context_create( 0, NULL ); // (flags, gang) spe_program_load( spe_contexts[i], &multiply_spu ); thread_args[i].spe_context = spe_contexts[i]; thread_args[i].argp = &args ; thread_args[i].envp = NULL; pthread_create( &threads[i], NULL, &spe_thread, &thread_args[i] ); } // wait for tasks to complete for (i = 0; i < NUM_THREADS; i++) { pthread_join( threads[i], NULL ); } // wait for threads gettimeofday( &time1, &tzone ); // print time to complete sec = time1.tv_sec - time0.tv_sec ; usec = time1.tv_usec - time0.tv_usec ; if ( usec < 0 ) { sec--; usec+=1000000 ; } printf( "SPE: Multiply Done -- matrix[%d][%d]: time=%d.%06d\n", tsize, tsize, sec, usec); for (i = 0; i < NUM_THREADS; i++) { spe_context_destroy( spe_contexts[i] ); } // destroy threads // Check for correctness of final matrix { int error, i, j, k ; for(i = 0; i < tsize; i++) { for(j = 0; j < tsize; j++) { for(k = 0; k < tsize; k++) { Dmatrix[i][j] += Amatrix[i][k] * Bmatrix[k][j] ; } } } error = 0; for(i = 0; i < tsize; i++) { for(j = 0; j < tsize; j++) { if ( Cmatrix[i][j] != Dmatrix[i][j] ) error = 1 ; } } if (error) { printf("Error in Multiply.\n"); } else { printf("Multiply is correct.\n"); fflush(stdout); } } return 0; }