示例#1
0
void *ppu_pthread_function(void *thread_arg) {

	spe_context_ptr_t ctx;
	struct package_t *arg = (struct package_t *) thread_arg;

	/* Create SPE context */
	if ((ctx = spe_context_create (0, NULL)) == NULL) {
		perror ("Failed creating context");
		exit (1);
	}

	/* Load SPE program into context */
	if (spe_program_load (ctx, &lab8_spu)) {
		perror ("Failed loading program");
		exit (1);
	}

	/* Run SPE context */
	unsigned int entry = SPE_DEFAULT_ENTRY;

	/* transferul adresei structurii initiale */
	if (spe_context_run(ctx, &entry, 0, (void *)arg, (void *)sizeof(struct package_t), NULL) < 0) {  
		perror ("Failed running context");
		exit (1);
	}  

	/* Destroy context */
	if (spe_context_destroy (ctx) != 0) {
		perror("Failed destroying context");
		exit (1);
	}

	pthread_exit(NULL);
}
示例#2
0
int main()
{
	spe_context_ptr_t ctx;
	unsigned int entry = SPE_DEFAULT_ENTRY;
	init_matrix();

	if((ctx = spe_context_create(0, NULL)) == NULL) {
		perror ("Failed creating context");
		exit (1);
	}

	if(spe_program_load(ctx, &lab10_spu)) {
		perror ("Failed loading program");
		exit (1);
	}

	printf("SPU:\n");
	if(spe_context_run(ctx, &entry, 0, (void*)&v, (void*)M, NULL) < 0) {
		perror ("Failed running context");
		exit (1);
	}

	if(spe_context_destroy(ctx) != 0) {
		perror("Failed destroying context");
		exit (1);
	}

	printf("PPU:\n");
	printf("received: %d %d \n", v[0][0].x, v[0][0].y);
	printf("correct: %d\n", (destination.x == v[0][0].x) && (destination.y == v[0][0].y));

	return 0;
}
示例#3
0
int CreateSPEThread( PpuPthreadData_t *spedata, spe_program_handle_t *context, void *myarg )
{
	// create SPE context
    if ( ( spedata->spe_ctx = spe_context_create ( 0, NULL ) ) == NULL )
	{
		perror ( "Failed creating context" );
        return -1;
    }

	// Load program into context
    if ( spe_program_load ( spedata->spe_ctx, context ) )
	{
		perror ( "Failed loading program" );
		return -1;
    }

    // Initialize context run data
    spedata->entry = SPE_DEFAULT_ENTRY;
    //speData[i].argp  = mydata;
    spedata->argp  = myarg;
    // Create pthread for each of the SPE conexts
    if ( pthread_create ( &spedata->pthread, NULL, &PpuPthreadFunction, spedata ) )
    {
      	perror ( "Failed creating thread" );
      	return -1;
    }
    return 1;
}
int main()
{
  spe_context_ptr_t speid;
  unsigned int flags = 0;
  unsigned int entry = SPE_DEFAULT_ENTRY;
  void *argp = NULL;
  void *envp = NULL;
  spe_stop_info_t stop_info;
  int rc;

  speid = spe_context_create(0,NULL);
  if (speid==NULL)
  {
    perror("spe_context_create");
    return -1;
  }
  
  //Load SPE executable object into the SPE context local store
  if (spe_program_load(speid, &hello_spu))
  {
    perror("spe_program_load");
    return -2;
  }
  
  //Run the SPE context
  rc = spe_context_run(speid, &entry, flags, argp, envp, &stop_info);
  if (rc<0) perror("spe_context_run");
  
  //Destroy the SPE context
  spe_context_destroy(speid);
  
  return 0;
}
示例#5
0
/**
 * Create and start several threads on the SPEs
 * @param nprocs Number of threads to start
 */
void create_spe_pthreads(fixedgrid_t* G)
{
    uint32_t i;
    
    for(i=0; i<G->nprocs; i++) 
    {
        /* Configure environment */
        G->threads[i].envv.speid = i;
        G->threads[i].envv.nprocs = G->nprocs;
        G->threads[i].envv.metptr = (uint32_t)(&G->threads[i].metrics);
        
        /* Create context */
        if((G->threads[i].speid = spe_context_create(0, NULL)) == NULL) 
        {
            fprintf(stderr, "Failed spe_context_create (errno=%d)\n", errno);
            exit(1);
        }
        
        /* Load program into context */
        if(spe_program_load(G->threads[i].speid, &fixedgrid_spu)) 
        {
            fprintf(stderr, "Failed spe_program_load (errno=%d)\n", errno);
            exit(1);
        }
        
        /* Create thread for each SPE context */
        if(pthread_create(&G->threads[i].pthread, NULL, &spe_pthread_function, &G->threads[i])) 
        {
            fprintf(stderr, "Failed pthread_create (errno=%d)\n", errno);
            exit(1);
        }
        
        G->threads[i].status = SPE_STATUS_INIT;
    }
}
示例#6
0
int main(int argc, char **argv)
{
    int i;
    int ret;

    spe_context_ptr_t spe;
    spe_program_handle_t *prog;
    unsigned int entry;
    spe_stop_info_t stop_info;

    prog = spe_image_open("vec_abs_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    spe = spe_context_create(0, NULL);
    if (!spe) {
        perror("spe_context_create");
        exit(1);
    }

    ret = spe_program_load(spe, prog);
    if (ret) {
        perror("spe_program_load");
        exit(1);
    }

    abs_params.ea_in  = (unsigned long) in;
    abs_params.ea_out = (unsigned long) out;
    abs_params.size   = SIZE;

    entry = SPE_DEFAULT_ENTRY;
    ret = spe_context_run(spe, &entry, 0, &abs_params, NULL, &stop_info);
    if (ret < 0) {
        perror("spe_context_run");
        exit(1);
    }

    ret = spe_context_destroy(spe);
    if (ret) {
        perror("spe_context_destroy");
        exit(1);
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    for (i = 0; i < SIZE; i++) {
        printf("out[%02d]=%0.0f\n", i, out[i]);
    }

    return 0;
}
示例#7
0
int main(int argc, char **argv)
{
    int i;
    int ret;
 
    spe_context_ptr_t spe[NUM_SPE];
    spe_program_handle_t *prog;
    pthread_t thread[NUM_SPE];

    prog = spe_image_open("increment_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        spe[i] = spe_context_create(0, NULL);
        if (!spe) {
            perror("spe_context_create");
            exit(1);
        }

        ret = spe_program_load(spe[i], prog);
        if (ret) {
            perror("spe_program_load");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        ret = pthread_create(&thread[i], NULL, run_increment_spe, &spe[i]);
        if (ret) {
            perror("pthread_create");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        pthread_join(thread[i], NULL);
        ret = spe_context_destroy(spe[i]);
        if (ret < 0) {
            perror("spe_context_destroy");
            exit(1);
        }
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    printf("result=%d\n", counter[0]);

    return 0;
}
示例#8
0
/**
 * Create the SPU threads.  This is done once during driver initialization.
 * This involves setting the "init" message which is sent to each SPU.
 * The init message specifies an SPU id, total number of SPUs, location
 * and number of batch buffers, etc.
 */
void
cell_start_spus(struct cell_context *cell)
{
   static boolean one_time_init = FALSE;
   uint i, j;
   uint timebase = get_timebase();

   if (one_time_init) {
      fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
	      "on Cell.\n");
      abort();
   }

   one_time_init = TRUE;

   assert(cell->num_spus <= CELL_MAX_SPUS);

   ASSERT_ALIGN16(&cell_global.inits[0]);
   ASSERT_ALIGN16(&cell_global.inits[1]);

   /*
    * Initialize the global 'inits' structure for each SPU.
    * A pointer to the init struct will be passed to each SPU.
    * The SPUs will then each grab their init info with mfc_get().
    */
   for (i = 0; i < cell->num_spus; i++) {
      cell_global.inits[i].id = i;
      cell_global.inits[i].num_spus = cell->num_spus;
      cell_global.inits[i].debug_flags = cell->debug_flags;
      cell_global.inits[i].inv_timebase = 1000.0f / timebase;

      for (j = 0; j < CELL_NUM_BUFFERS; j++) {
         cell_global.inits[i].buffers[j] = cell->buffer[j];
      }
      cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];

      cell_global.inits[i].spu_functions = &cell->spu_functions;

      cell_global.spe_contexts[i] = spe_context_create(0, NULL);
      if (!cell_global.spe_contexts[i]) {
         fprintf(stderr, "spe_context_create() failed\n");
         exit(1);
      }

      if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
         fprintf(stderr, "spe_program_load() failed\n");
         exit(1);
      }
      
      pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
                     NULL,                        /* pthread attribs */
                     &cell_thread_function,       /* start routine */
		     &cell_global.inits[i]);      /* thread argument */
   }
}
示例#9
0
int main()
{
	int i, spu_threads;
	spe_context_ptr_t ctxs[MAX_SPU_THREADS];
	pthread_t threads[MAX_SPU_THREADS];
	 
	/*
	  * Determine the number of SPE threads to create.
	  */
	 
	spu_threads = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
	if (spu_threads > MAX_SPU_THREADS) spu_threads = MAX_SPU_THREADS;
	 
	/*
	  * Create several SPE-threads to execute 'simple_spu'.
	  */
	 
	for(i=0; i<spu_threads; i++) {
		/* Create context */
		if ((ctxs[i] = spe_context_create (0, NULL)) == NULL) {
			perror ("Failed creating context");
			exit (1);
		}
	 
		/* Load program into context */
		if (spe_program_load (ctxs[i], &simple_spu)) {
			perror ("Failed loading program");
			exit (1);
		}
		 
		/* Create thread for each SPE context */
		if (pthread_create (&threads[i], NULL, &ppu_pthread_function, &ctxs[i])) {
			perror ("Failed creating thread");
			exit (1);
		}
	}
		 
		/* Wait for SPU-thread to complete execution. */
	for (i=0; i<spu_threads; i++) {
		if (pthread_join (threads[i], NULL)) {
			perror("Failed pthread_join");
			exit (1);
		}
		 
		/* Destroy context */
		if (spe_context_destroy (ctxs[i]) != 0) {
			perror("Failed destroying context");
			exit (1);
		}
	}
	 
	printf("\nThe program has successfully executed.\n");
	return 0;
}
示例#10
0
void *
spe_thread (void * arg)
{
  int flags = 0;
  unsigned int entry = SPE_DEFAULT_ENTRY;
  spe_context_ptr_t *ctx = (spe_context_ptr_t *) arg;

  spe_program_load (*ctx, &coremaker_spu);
  spe_context_run (*ctx, &entry, flags, NULL, NULL, NULL);

  pthread_exit (NULL);
}
示例#11
0
int main(int argc, char **argv)
{
    int ret;

    spe_context_ptr_t spe;
    spe_program_handle_t *prog;
    unsigned int entry;
    spe_stop_info_t stop_info;

    unsigned long param;

    prog = spe_image_open("print_param_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    spe = spe_context_create(0, NULL);
    if (!spe) {
        perror("spe_context_create");
        exit(1);
    }

    ret = spe_program_load(spe, prog);
    if (ret) {
        perror("spe_program_load");
        exit(1);
    }

    param = 12345678;
    printf("[PPE] param=%ld\n", param);

    entry = SPE_DEFAULT_ENTRY;
    ret = spe_context_run(spe, &entry, 0, (void *) param, NULL, &stop_info);
    if (ret < 0) {
        perror("spe_context_run");
        exit(1);
    }

    ret = spe_context_destroy(spe);
    if (ret) {
        perror("spe_context_destroy");
        exit(1);
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    return 0;
}
示例#12
0
int
indirect_handler (unsigned char *base, unsigned long offset)
{
  int flags = 0;
  unsigned int entry = SPE_DEFAULT_ENTRY;
  spe_context_ptr_t ctx = spe_context_create (0, NULL);

  spe_program_load (ctx, &bt2_spu);
  spe_context_run (ctx, &entry, flags, NULL, NULL, NULL);

  return 0;
}
示例#13
0
/* Start the Spu threads */
void startSpuThreads(int spu_threads, SpuThreadData * spu_data) {

	int i, no_spus;

	/* Determine the number of SPE threads to create */
  no_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);

	if (spu_threads < 0) {
		spu_threads = no_spus;
	} else if (no_spus < spu_threads) {
		spu_threads = no_spus;
		printf("Warning: Only %i Cell SPU processors available\n", spu_threads);
	}

	spu_data->no_spu_threads = spu_threads;
	spu_data->spus = (SpuData *) malloc(sizeof(SpuData) * spu_threads);
	
	if ((spu_data->spus == NULL)) {
		perror("Failed to allocate SPU data for threads");
	}

	printf("Bringing up %i Cell SPU threads\n", spu_threads);

	/* create the context gang */
	if ((spu_data->gang = spe_gang_context_create(0)) == NULL) {
		perror("Failed creating Cell SPU gang context");
		exit(1);
	}

	for(i=0; i<spu_threads; i++) {
		/* Create context */
		if ((spu_data->spus[i].ctx = spe_context_create (CTX_FLAGS, spu_data->gang)) == NULL) {
			perror ("Failed creating Cell SPU context");
			exit (1);
		}

		/* load bootloader into spu's */
		if (spe_program_load (spu_data->spus[i].ctx, &cellspu_bootloader)) {
			perror ("Failed loading Cell SPU bootloader");
			exit (1);
		}

		/* create a thread for each SPU */
		if (pthread_create (&(spu_data->spus[i].boot_thread),
												NULL,
												&spu_bootstrap_thread,
												&(spu_data->spus[i].ctx))) {
			perror ("Failed creating Cell SPU thread");
			exit (1);
		}
	}
}
示例#14
0
/** Sends a SPECommand to the SPE.
 *
 * \param command A SPEcommand.
 * \return Returns nonzero on error.
 */
static int submitSPECommand(SPECommand* command) {
#ifdef MAIL
	/* Call the SPU Program*/
	writeMailBox( (ppu_addr_t)command );
	return readMailBox();
#else
	if( !spe_context ){
		spe_context = spe_context_create( 0, NULL );
		spe_program_load( spe_context, &spe_dynprogr_handle );
	}
	
	unsigned int entry = SPE_DEFAULT_ENTRY;
	return spe_context_run( spe_context, &entry, 0, command, NULL, NULL );
	/* spe_context_destroy( spe_context ); */
#endif
}
示例#15
0
static void * sws_spe_thread(void * arg) 
{
	struct yuvscaler_s * arg_ptr;
	arg_ptr=(struct yuvscaler_s *) arg;	
//    	spe_program_handle_t * program;
// 
// 	program = spe_image_open("spu_yuvscaler");

   	if (spe_program_load(arg_ptr->ctx, &spu_yuvscaler_handle) < 0) 
	{
		perror("error loading program");
		pthread_exit(NULL);
	}
	spe_context_run(arg_ptr->ctx, &arg_ptr->entry, arg_ptr->runflags,arg_ptr->argp,arg_ptr->envp, NULL);
	pthread_exit(NULL);
}
spe_context_ptr_t ps3_assign_context_to_program(spe_program_handle_t *program)
{
  static spe_context_ptr_t      cached_context;
  static spe_program_handle_t  *cached_program;
  static int                    cached_pid;
  
  int current_pid  = getpid();
  int thread_index = 99; /* Todo: get true cruncher index */
  int retval;
  
  if (cached_context)
  {
    if (cached_pid != current_pid)
    {
      Log("!!! FATAL !!! Cached SPE context forked from another pid (%d)\n", cached_pid);
      abort();
    }
    if (cached_program != program)
    {
      // Log("Replacing SPE context because SPE program changed\n");
      if (spe_context_destroy(cached_context))
        Log("Alert SPE%d! spe_context_destroy() failed, errno=%d\n", thread_index, errno);
      cached_context = NULL;
    }
  }
  
  if (cached_context == NULL)
  {
    cached_context = spe_context_create(0, NULL);
    if (cached_context == NULL)
    {
      Log("Alert SPE#%d! spe_context_create() failed\n", thread_index);
      abort();
    }
    retval = spe_program_load(cached_context, program);
    if (retval != 0)
    {
      Log("Alert SPE#%d: spe_program_load() returned %d\n", thread_index, retval);
      abort();
    }
    cached_program = program;
    cached_pid     = current_pid;
  }
  
  return cached_context;
}
void *pthread_run_spe(void *arg){
  spe_context_ptr_t spe_ctx;
  context *data = (context *)arg;
  void *argp;
  unsigned int entry;

  spe_ctx = spe_context_create(0, NULL);
  spe_program_load (spe_ctx, &spu_pi);

  entry=SPE_DEFAULT_ENTRY;
  argp=data;

  spe_context_run(spe_ctx, &entry,0,argp,NULL,NULL);
  spe_context_destroy(spe_ctx);

  pthread_exit(NULL);
}
示例#18
0
int main(int argc, char **argv) {
   int ret;
   spe_context_ptr_t ctx;
   unsigned int entry_point;
   spe_stop_info_t stop_info;
   
   /* Display the EA of the array */
   printf("PPU array location: %#llx\n", 
      (unsigned long long)prime);
   
   /* Create the SPE Context */
   ctx = spe_context_create(0, NULL);
   if (!ctx) {
      perror("spe_context_create");
      exit(1);
   }

   /* Load the program into the context */
   ret = spe_program_load(ctx, &spu_prime_handle);
   if (ret) {
      perror("spe_program_load");
      exit(1);
   }
   
   /* Run the program */
   entry_point = SPE_DEFAULT_ENTRY;
   ret = spe_context_run(ctx, &entry_point, 0, 
      NULL, NULL, &stop_info);
   if (ret < 0) {
      perror("spe_context_run");
      exit(1);
   }
   
   /* Deallocate the context */
   ret = spe_context_destroy(ctx);
   if (ret) {
      perror("spe_context_destroy");
      exit(1);
   }
 
   return 0;
}
示例#19
0
//PPU Code
int main(void){
	int retval;
	unsigned int entry_point = SPE_DEFAULT_ENTRY; // Required for continuing
	  //execution, SPE_DEFAULT_ENTRY is the standard starting offset.
	spe_context_ptr_t my_context;
	spe_stop_info_t stopinfo;	
	int stop_counter = 0;

 	spe_callback_handler_register(null_callback, 0x11, SPE_CALLBACK_NEW);

  while(true) {
	  // Create the SPE Context
	  my_context = spe_context_create(SPE_EVENTS_ENABLE|SPE_MAP_PS, NULL);

	  // Load the embedded code into this context
	  spe_program_load(my_context, &spe_program_zero);	
  
    entry_point = SPE_DEFAULT_ENTRY;	

	  do {
		  printf("before running the spu code\n");
		  retval = spe_context_run(my_context, &entry_point, 0, NULL, NULL, &stopinfo);
      /* consume the stop info so we don't get the spu_stop in loop bug */
      spe_stop_info_read(my_context, &stopinfo);
		  stop_counter++;
		  printf("after running the spu code (%d)\n", stop_counter);
      printf("retval = %d\n", retval);
   
      if(retval == 0x10) /* spu_stop(0x10) is sent from the spe when the loop is done */
       {
         break;
       }
	  } while (retval > 0); // Run until exit or error

    spe_context_destroy(my_context);
  }
	printf("finished with computation\n");
}
示例#20
0
文件: ex1_ppu.c 项目: flaviusone/ASC
void *ppu_pthread_function(void *thread_arg) {

	spe_context_ptr_t ctx;
	pointers_t *arg = (pointers_t *) thread_arg;

	/* Create SPE context */
	if ((ctx = spe_context_create (0, NULL)) == NULL) {
                perror ("Failed creating context");
                exit (1);
        }

	/* Load SPE program into context */
	if (spe_program_load (ctx, &ex1_spu)) {
                perror ("Failed loading program");
                exit (1);
        }

        pthread_t mbox_thread;
	if (pthread_create (&mbox_thread, NULL, &mailbox_pthread_function, &ctx))  {
                perror ("Failed creating thread");
                exit (1);
        }

	/* Run SPE context */
	unsigned int entry = SPE_DEFAULT_ENTRY;
	if (spe_context_run(ctx, &entry, 0, arg, (void*)sizeof(pointers_t), NULL) < 0) {  
		perror ("Failed running context");
		exit (1);
	}

	/* Destroy context */
	if (spe_context_destroy (ctx) != 0) {
                perror("Failed destroying context");
                exit (1);
        }

        return NULL;
}
示例#21
0
int CreateSPEContext( PpuPthreadData_t *spedata, spe_program_handle_t *context, void *myarg )
{
	// create SPE context
    if ( ( spedata->spe_ctx = spe_context_create ( 0, NULL ) ) == NULL )
	{
		perror ( "Failed creating context" );
        return -1;
    }

	// Load program into context
    if ( spe_program_load ( spedata->spe_ctx, context ) )
	{
		perror ( "Failed loading program" );
		return -1;
    }

    // Initialize context run data
    spedata->entry = SPE_DEFAULT_ENTRY;
    //speData[i].argp  = mydata;
    spedata->argp  = myarg;

    return 1;
}
示例#22
0
void *spe_code_launch_6(void *data) 
{
//	printf("inside of thread function\n");
	int retval;
	unsigned int entry_point = SPE_DEFAULT_ENTRY; /* Required for continuing 
      execution, SPE_DEFAULT_ENTRY is the standard starting offset. */
	spe_context_ptr_t my_context;
//	printf("before creating context\n");
	/* Create the SPE Context */
	my_context = spe_context_create(SPE_EVENTS_ENABLE|SPE_MAP_PS, NULL);
//	printf("context created\n");
	/* Load the embedded code into this context */
	spe_program_load(my_context, &spe_code);
//	printf("program loaded\n");
	/* Run the SPE program until completion */
	do 
	{	
		retval = spe_context_run(my_context, &entry_point, 0, spe6_Data, 6, NULL);
	} 
	while (retval > 0); /* Run until exit or error */
	spe_context_destroy(my_context);	
	pthread_exit(NULL);
}
示例#23
0
int SPE_Boot(_THIS, spu_data_t * spe_data)
{
	
	deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name);
	spe_data->ctx = spe_context_create(0, NULL);
	if (spe_data->ctx == NULL) {
		deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name);
		SDL_SetError("[PS3->SPU] Failed creating SPE context");
		return -1;
	}

	
	deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name);
	if (spe_program_load(spe_data->ctx, &spe_data->program)) {
		deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name);
		SDL_SetError
		    ("[PS3->SPU] Failed loading program into SPE context");
		return -1;
	}
	spe_data->booted = 1;
	deprintf(2, "[PS3->SPU] SPE boot successful\n");

	return 0;
}
示例#24
0
int main(int argc, char** argv) 
{
	double		begin;
	double		end;
	int		errnum;
	size_t		nthread = P;
	size_t		i;
	size_t		nvertex;
	unsigned int	x;		// sent to each SPU
	int		code;		// status;
	unsigned int	reply;		// from SPU
	arg_t		data[nthread];
	param_t		param[nthread] A16;

	argc 		= argc; 	// to silence gcc...
	progname	= argv[0];
	nvertex		= atoi(argv[2]);

	printf("nthread   = %zu\n", nthread);
	printf("nvertex   = %zu\n", nvertex);
	printf("ctx   = %zu\n", sizeof(param_t));
	printf("arg   = %zu\n", sizeof(arg_t));

	begin = sec();

	for (i = 0; i < nthread; ++i) {
		param[i].proc = i;
		param[i].nvertex = nvertex;

		if ((data[i].ctx = spe_context_create (0, NULL)) == NULL) {
			perror ("Failed creating context");
			exit(1);
		}

		if (spe_program_load (data[i].ctx, &dataflow))  {
			perror ("Failed loading program");
			exit(1);
		}

		data[i].arg = &param[i];
		printf("i=%d param=%p\n", i, data[i].arg);

		if (pthread_create (&data[i].pthread, NULL, work, &data[i])) {
			perror ("Failed creating thread");
			exit(1);
		}
	}

	// send some data to each SPU and wait for a reply.

	x = 42;

	for (i = 0; i < nthread; ++i) {
        reply = 0;
        code = spe_out_mbox_read(data[i].ctx, &reply, 1);
        printf("spu-%d reply-0: %u\tcode: %d\n",i, reply, code);
		code = spe_in_mbox_write(data[i].ctx, &x, 1, 1);
        code = spe_out_mbox_read(data[i].ctx, &reply, 1);
        printf("spu-%d reply-1: %u\tcode: %d\n",i, reply, code);
        code = spe_out_mbox_read(data[i].ctx, &reply, 1);
        printf("spu-%d reply-2: %u\tcode: %d\n",i, reply, code);
	}

	end = sec();

	printf("%1.3lf s\n", end-begin);

	for (i = 0; i < nthread; ++i) {
		printf("joining with PPU pthread %zu...\n", i);
		errnum = pthread_join(data[i].pthread, NULL);
		if (errnum != 0)
			syserror(errnum, "pthread_join failed");

		if (spe_context_destroy (data[i].ctx) != 0) {
			perror("Failed destroying context");
			exit(1);
		}
	}

	return 0;
}
示例#25
0
int main(int argc, char **argv)
{
    int i;
    int ret;

    spe_context_ptr_t spe;
    spe_program_handle_t *prog;
    unsigned int entry;
    spe_stop_info_t stop_info;

    if (argc == 1) {
        fprintf(stderr, "usage: %s <spu_image>\n", argv[0]);
        return -1;
    }

    prog = spe_image_open(argv[1]);
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    spe = spe_context_create(0, NULL);
    if (!spe) {
        perror("spe_context_create");
        exit(1);
    }

    ret = spe_program_load(spe, prog);
    if (ret) {
        perror("spe_program_load");
        exit(1);
    }

    abs_params.ea_in  = (unsigned long) in;
    abs_params.ea_out = (unsigned long) out;
    abs_params.size   = SIZE;

    entry = SPE_DEFAULT_ENTRY;
    ret = spe_context_run(spe, &entry, 0, &abs_params, NULL, &stop_info);
    if (ret < 0) {
        perror("spe_context_run");
        exit(1);
    }

    ret = spe_context_destroy(spe);
    if (ret) {
        perror("spe_context_destroy");
        exit(1);
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    for (i = 0; i < SIZE; i++) {
        printf("%5.0f ", i, out[i]);
        if ((i+1) % 4 == 0) printf("\n");
    }

    return 0;
}
示例#26
0
///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
void SpuLibspe2Support::internal_startSPU()
{
	m_activeSpuStatus.resize(numThreads);
	
	
	for (int i=0; i < numThreads; i++)
	{
		
		if(data[i].context == NULL) 
		{
					
			 /* Create context */
			if ((data[i].context = spe_context_create(0, NULL)) == NULL)
			{
			      perror ("Failed creating context");
		          exit(1);
			}
	
			/* Load program into context */
			if(spe_program_load(data[i].context, this->program))
			{
			      perror ("Failed loading program");
		          exit(1);
			}
			
			m_activeSpuStatus[i].m_status = Spu_Status_Startup; 
			m_activeSpuStatus[i].m_taskId = i; 
			m_activeSpuStatus[i].m_commandId = 0; 
			m_activeSpuStatus[i].m_lsMemory.p = NULL; 
			
			
			data[i].entry = SPE_DEFAULT_ENTRY;
			data[i].flags = 0;
			data[i].argp.p = &m_activeSpuStatus[i];
			data[i].envp.p = NULL;
			
		    /* Create thread for each SPE context */
			if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) ))
			{
			      perror ("Failed creating thread");
		          exit(1);
			}
			/*
			else
			{
				printf("started thread %d\n",i);
			}*/
		}		
	}
	
	
	for (int i=0; i < numThreads; i++)
	{
		if(data[i].context != NULL) 
		{
			while( m_activeSpuStatus[i].m_status == Spu_Status_Startup)
			{
				// wait for spu to set up
				sched_yield();
			}
			printf("Spu %d is ready\n", i);
		}
	}
}
示例#27
0
int main(int argc, char** argv)
{
    /* Iterators */
    int i, j, k;
    
    uint32_t block;
    
    /* Time (seconds) */
    long t_0;
    long t_end;
    long dt;
    long steps;
    long iter;
    
    /* Emission control */
    bool emflag = TRUE;
    
    /* Start wall clock timer */
    timer_start(TIMER_WALLCLOCK);
    
    /* Initialize parallelization */
    nprocs = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
    nprocs = nprocs > MAX_THREADS ? MAX_THREADS : nprocs;
    
    if(argc > 1)
    {
        i = atoi(argv[1]);
        if(i < 1)
        {
            fprintf(stderr, "Invalid number of SPUs: %d < 1.\n", i);
            exit(1);
        }
        
        if(i < nprocs)
        {
            nprocs = i;
        }
        else 
        {
            printf("%d SPUs unavailable.  Using %d instead.\n", i, nprocs);
        }
    }
    
    /* Create SPE threads */
    for(i=0; i<nprocs; i++) 
    {
        threads[i].argp = (void*)(&spe_argvs[i]);
        
        /* Create context */
        if((threads[i].speid = spe_context_create(0, NULL)) == NULL) 
        {
            fprintf(stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
        
        /* Load program into context */
        if(spe_program_load(threads[i].speid, &fixedgrid_spu)) 
        {
            fprintf(stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
            
        /* Create thread for each SPE context */
        if(pthread_create(&threads[i].pthread, NULL, &ppu_pthread_function, &threads[i])) 
        {
            fprintf(stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
        
        spe_set_status(i, SPE_STATUS_WAITING);
    }
    
    printf("\nRunning %d threads (%d SPU + 1 PPU).\n", (nprocs+1), nprocs);
    
    /* Allocate concentration memory */
    //conc = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //conc_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);

    /* Allocation wind vector filed memory */
    //wind_u = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //wind_v = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //wind_u_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);
    //wind_v_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);
 
    /* Allocation diffusion tensor memory */
    //diff = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //diff_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);

    /* Initialize concentration data */
    double_array_init(NROWS*NCOLS, conc, O3_INIT);
        
    /* Initialize wind field */
    double_array_init(NROWS*NCOLS, wind_u, WIND_U_INIT);
    double_array_init(NROWS*NCOLS, wind_v, WIND_V_INIT);
    
    /* Initialize diffusion field */
    double_array_init(NROWS*NCOLS, diff, DIFF_INIT);
    
    /* Initialize time */
    t_0 = 0.0;
    t_end = year2sec(END_YEAR - START_YEAR) + day2sec(END_DOY - START_DOY) + 
            hour2sec(END_HOUR - START_HOUR) + minute2sec(END_MIN - START_MIN);
    dt = STEP_SIZE;
    steps = (long)( (t_end - t_0)/dt );
    
    /* Print startup banner */
    print_start_banner(NX*DX, NY*DY, 0.0, t_end, steps);
    
    /* Store initial concentration */
    write_conc(&(conc[0]), 0, 0);
    
    /* BEGIN CALCULATIONS */
    for(iter = 1; iter <= steps; iter++)
    {
        emflag = iter*dt < 6*3600.0 ? TRUE : FALSE;
        
        timer_start(TIMER_ROW_DISCRET);
        
        /* Discretize rows 1/2 timestep */
        block = NROWS / nprocs;
        for(i=0; i<nprocs; i++)
        {
            /* Configure SPE arguments */
            spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]);
            spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]);
            spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]);
            spe_argvs[i].arg[3].dbl = dt/2;
            spe_argvs[i].arg[4].dbl = DX;
            spe_argvs[i].arg[5].u32[0] = NX;
            spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block);  //FIXME
            
            /* Signal SPE */
            spe_set_status(i, SPE_STATUS_WORKING);
        }
        
        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_ROW_DISCRET);
        
        timer_start(TIMER_COL_DISCRET);
        
        /* Discretize colums 1 timestep */
        for(i=0; i<NCOLS; i++)
        {
            k = i % nprocs;

            while(spe_get_status(k) > 0) ; //intentional wait
            
            if(i >= nprocs)
            {
                timer_start(TIMER_ARRAY_COPY);
                for(j=0; j<NY; j++)
                {
                    conc[i-nprocs + j*NX] = ccol[k*NY+j];
                }
                timer_stop(TIMER_ARRAY_COPY);
            }
            
            timer_start(TIMER_ARRAY_COPY);
            for(j=0; j<NY; j++)
            {
                ccol[k*NY + j] = conc[i + j*NX];
                wcol[k*NY + j] = wind_v[i + j*NX];
                dcol[k*NY + j] = diff[i + j*NX];
            }
            timer_stop(TIMER_ARRAY_COPY);

            // Configure SPE arguments 
            spe_argvs[k].arg[0].u64 = (uint64_t)(&ccol[k*NY]);
            spe_argvs[k].arg[1].u64 = (uint64_t)(&wcol[k*NY]);
            spe_argvs[k].arg[2].u64 = (uint64_t)(&dcol[k*NY]);
            spe_argvs[k].arg[3].dbl = dt;
            spe_argvs[k].arg[4].dbl = DY;
            spe_argvs[k].arg[5].u32[0] = NY;
            spe_argvs[k].arg[5].u32[1] = 1;

            // Signal SPE 
            spe_set_status(k, SPE_STATUS_WORKING);
        }

        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_COL_DISCRET);
        
        timer_start(TIMER_ROW_DISCRET);
        
        /* Discretize rows 1/2 timestep */
        block = NROWS / nprocs;
        for(i=0; i<nprocs; i++)
        {
            /* Configure SPE arguments */
            spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]);
            spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]);
            spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]);
            spe_argvs[i].arg[3].dbl = dt/2;
            spe_argvs[i].arg[4].dbl = DX;
            spe_argvs[i].arg[5].u32[0] = NX;
            spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block);  //FIXME
            
            /* Signal SPE */
            spe_set_status(i, SPE_STATUS_WORKING);
        }
        
        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_ROW_DISCRET);
        
        /*
         * Could update wind field here...
         */
         
        /*
         * Could update diffusion tensor here...
         */
        
        /* Add emissions */
        if(emflag)
        {
            conc[SOURCE_Y*NX + SOURCE_X] += dt * (SOURCE_RATE) / (DX * DY * 1000.0);
        }
        
        /* Store concentration */
        #ifdef WRITE_EACH_ITER
        write_conc(conc, iter, 0);
        #endif
        
        /* Indicate progress */
        if(iter % 10 == 0)
        {
            printf("Iteration %ld of %ld.  Time = %ld seconds.\n", iter, steps, iter*dt);
        }
        
    }
    /* END CALCULATIONS */
    
    /* Wait for SPU-thread to complete execution. */
    for(i=0; i<nprocs; i++) 
    {
        spe_set_status(i, SPE_STATUS_STOPPED);
        if(pthread_join(threads[i].pthread, NULL)) 
        {
            perror("Failed pthread_join");
            exit(1);
        }
    }
    
    /* Store concentration */
    write_conc(conc, iter-1, 0);
    
    /* Show final time */
    printf("Final time: %ld seconds.\n", (iter-1)*dt);
    
    timer_stop(TIMER_WALLCLOCK);

    print_timer_summary("===PPU Timers===");    
    
    /* Cleanup and exit */
    return 0;
}
示例#28
0
int main(int argc, char **argv) {
   int i, retval, spus;
   
   /* Determine number of available SPUs */
   spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0);
   if (argc != 2) {
      printf("Usage: 'ppu_threads <1-%u>'\n", spus);
      exit(1);
   }
   else if ((atoi(argv[1]) < 1) || 
      (atoi(argv[1]) > spus)) {
      printf("Usage: 'ppu_threads <1-%u>'\n", spus);
      exit(1);
   }
   else {   
      spus = atoi(argv[1]);
   }
   
   /* Create a context and thread for each SPU */
   for (i=0; i<spus; i++) {

      /* Create context */
      if ((data[i].speid = spe_context_create(0, NULL)) == NULL)
      {
         perror("spe_context_create");
         exit(1);
      }
      
      /* Load program into the context */
      if ((retval =
          spe_program_load(data[i].speid, &spu_threads)) != 0)
      {
         perror("spe_program_load");
         exit (1);
      }
      
      /* Initialize control block and thread data */
      control_block = i;
      data[i].argp = (void*)control_block;
     
      /* Create thread */
      if ((retval = 
          pthread_create(
              &data[i].pthread,
              NULL,
              &ppu_pthread_function,
              &data[i])) != 0)
      {
         perror("pthread_create");
         exit (1);
      }
   }
   
   /* Wait for the threads to finish processing */
   for (i = 0; i < spus; i++)
   {
      if ((retval = pthread_join(data[i].pthread, NULL)) != 0)
      {
          perror("pthread_join");
          exit (1);
      }

      if ((retval = spe_context_destroy (data[i].speid)) != 0)
      {
          perror("spe_context_destroy");
          exit (1);
      }
   }
   return 0;
}
示例#29
0
float calc_integral(float start, float end, float delta)
{
    int i;
    int ret;
    float sum = 0.0f;

    spe_program_handle_t *prog;
    spe_context_ptr_t spe[NUM_SPE];
    pthread_t thread[NUM_SPE];
    thread_arg_t arg[NUM_SPE];

    prog = spe_image_open("integral_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        spe[i] = spe_context_create(0, NULL);
        if (!spe) {
            perror("spe_context_create");
            exit(1);
        }

        ret = spe_program_load(spe[i], prog);
        if (ret) {
            perror("spe_program_load");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        integral_params[i].start = start + (end-start)/NUM_SPE * i;
        integral_params[i].end   = start + (end-start)/NUM_SPE * (i+1);
        integral_params[i].delta = delta;
        integral_params[i].sum   = 0.0f;

        arg[i].spe = spe[i];
        arg[i].integral_params = &integral_params[i];

        ret = pthread_create(&thread[i], NULL, run_integral_spe, &arg[i]);
        if (ret) {
            perror("pthread_create");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        pthread_join(thread[i], NULL);
        ret = spe_context_destroy(spe[i]);
        if (ret) {
            perror("spe_context_destroy");
            exit(1);
        }
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        printf("[PPE] sum = %f\n", integral_params[i].sum);
        sum += integral_params[i].sum;
    }

    return sum;
}
示例#30
0
int main( int argc, char *argv[] )
{
   int i, j, dummy;
   int tmi, tmj;
   pthread_t         threads     [ NUM_THREADS ];
   spe_context_ptr_t spe_contexts[ NUM_THREADS ];
   thread_args_t     thread_args [ NUM_THREADS ];
   
   int rows;
   dummy = argc; dummy = (int)argv;
   
   // initialize initial & final matrix
   
   for(i = 0; i < tsize; i++)
   {
      for(j = 0; j < tsize; j++)
      {
	 tmi = tsize-i;
	 tmj = tsize-j;
	 Amatrix[i][j] = 3*tmi+tmj ;
	 Bmatrix[i][j] = 3*tmi+tmj ;
	 Cmatrix[i][j] = 0 ;
	 Dmatrix[i][j] = 0 ;
      }
   }
   
   // perform multiply
   printf( "SPE: Multiply \n");
   
   gettimeofday( &time0, &tzone );
   
   // start jobs
   
   rows = ((tsize/32)+NUM_THREADS-1)/NUM_THREADS ;
   // determine amount of work each spe should do
   for (i = 0; i < NUM_THREADS; i++ )
   {   
      // set arguments
      args.Amat   = (float (*)[tsize][tsize])Amatrix ;
      args.Bmat   = (float (*)[tsize][tsize])Bmatrix ;
      args.Cmat   = (float (*)[tsize][tsize])Cmatrix ;
      args.i_initial = i*rows ;
      
      spe_contexts[i] = spe_context_create( 0, NULL ); // (flags, gang)
      spe_program_load( spe_contexts[i], &multiply_spu );
      
      thread_args[i].spe_context = spe_contexts[i];
      thread_args[i].argp = &args ;
      thread_args[i].envp = NULL;
      
      pthread_create( &threads[i], NULL, &spe_thread, &thread_args[i] );  
   }
	
   // wait for tasks to complete
   for (i = 0; i < NUM_THREADS; i++)
   {
      pthread_join( threads[i], NULL ); 
   }        // wait for threads
   
   gettimeofday( &time1, &tzone );
   
   // print time to complete
   sec  = time1.tv_sec  - time0.tv_sec ;
   usec = time1.tv_usec - time0.tv_usec ;
   if ( usec < 0 )
   {
      sec--;
      usec+=1000000 ;
   }

   printf(
      "SPE: Multiply Done -- matrix[%d][%d]: time=%d.%06d\n",
      tsize,
      tsize,
      sec,
      usec);
      
   for (i = 0; i < NUM_THREADS; i++)
   {
      spe_context_destroy( spe_contexts[i] );
   }  // destroy threads
	
   // Check for correctness of final matrix
   {
      int error, i, j, k ;

      for(i = 0; i < tsize; i++)
      {
	 for(j = 0; j < tsize; j++)
	 {
	    for(k = 0; k < tsize; k++)
	    {
	       Dmatrix[i][j] += Amatrix[i][k] * Bmatrix[k][j] ;
	       
	    }
	 }
      }

      error = 0;

      for(i = 0; i < tsize; i++)
      {
	 for(j = 0; j < tsize; j++)
	 {
	    if ( Cmatrix[i][j] != Dmatrix[i][j] )
	       error = 1 ;
	 }
      }

      if (error)
      {
	 printf("Error in Multiply.\n");
      }
      else
      {
	 printf("Multiply is correct.\n"); fflush(stdout);
      }
   }

   return 0;
}