Exemplo n.º 1
0
int main(int argc, char **argv)
{
    int i;
    int ret;
 
    spe_context_ptr_t spe[NUM_SPE];
    spe_program_handle_t *prog;
    pthread_t thread[NUM_SPE];

    prog = spe_image_open("increment_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        spe[i] = spe_context_create(0, NULL);
        if (!spe) {
            perror("spe_context_create");
            exit(1);
        }

        ret = spe_program_load(spe[i], prog);
        if (ret) {
            perror("spe_program_load");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        ret = pthread_create(&thread[i], NULL, run_increment_spe, &spe[i]);
        if (ret) {
            perror("pthread_create");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        pthread_join(thread[i], NULL);
        ret = spe_context_destroy(spe[i]);
        if (ret < 0) {
            perror("spe_context_destroy");
            exit(1);
        }
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    printf("result=%d\n", counter[0]);

    return 0;
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
    int ret;

    spe_context_ptr_t spe;
    spe_program_handle_t *prog;
    unsigned int entry;
    spe_stop_info_t stop_info;

    unsigned long param;

    prog = spe_image_open("print_param_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    spe = spe_context_create(0, NULL);
    if (!spe) {
        perror("spe_context_create");
        exit(1);
    }

    ret = spe_program_load(spe, prog);
    if (ret) {
        perror("spe_program_load");
        exit(1);
    }

    param = 12345678;
    printf("[PPE] param=%ld\n", param);

    entry = SPE_DEFAULT_ENTRY;
    ret = spe_context_run(spe, &entry, 0, (void *) param, NULL, &stop_info);
    if (ret < 0) {
        perror("spe_context_run");
        exit(1);
    }

    ret = spe_context_destroy(spe);
    if (ret) {
        perror("spe_context_destroy");
        exit(1);
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    return 0;
}
Exemplo n.º 3
0
initDisp( unsigned int numspes )
{
  // Get the number of available SPEs
  speThreads = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
  // Clamp to the defined number of SPEs used
  if ( speThreads > MAX_SPU_NUM )
    {
      speThreads = MAX_SPU_NUM;
    }
  if( speThreads > numspes )
    {
      speThreads = numspes;
    }
  //printf("InitDist. speThreads is: %d\n",speThreads);
  unsigned int i;

  unsigned int temp;

  // Get dispatcher
  //printf("Getting the dispatcher\n");
  //spe_program_handle_t *dispatcher = spe_image_open( "/home/jens/numpycbe_dispatcher" );
  spe_program_handle_t *dispatcher = spe_image_open( "./../../../../numpycbe_dispatcher" );
  //printf("After getting the dispatcher\n");

  // Initialize threads
  for( i = 0 ; i < speThreads ; i++ )
    {
      CreateSPEThread( &speData[i], dispatcher, &spe_pointer_addr[i] );

      // Sending the SPE its id
      //printf("spe_write MULTIARRAYMODULE Sending id to SPE %d.\n",i);
      spe_in_mbox_write ( speData[i].spe_ctx, &i, 1, SPE_MBOX_ALL_BLOCKING );
      // Sending the SPE its seed. This should be something like time instead of id?
      //printf("spe_write MULTIARRAYMODULE Sending seed to SPE %d.\n",i);
      spe_in_mbox_write ( speData[i].spe_ctx, &i, 1, SPE_MBOX_ALL_BLOCKING );
    }
  //printf("speData[i].spe_ctx is : %d\n",speData[i].spe_ctx);

  //spe_in_mbox_write ( (void*)temp, &i, 1, SPE_MBOX_ALL_BLOCKING );

  return 0;
}
Exemplo n.º 4
0
static int LoadBinary(size_t length, const unsigned char * binary)
{
  char filename[512];
  sprintf(filename, "/tmp/opencl.bin.%lu.%u", (long unsigned int) 1, 0);

  TouchFile(filename, length);
  int fd = open(filename, O_RDWR);
  void * data = mmap((caddr_t)0, length, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);

  if(data == MAP_FAILED)
    {
      close(fd);
      return 0;
    }

  memcpy(data, (const void *) binary, length);

  /* the file must have execution priviledges. set execution for use */
  int result = chmod(filename, S_IRWXU);
  if(result != 0)
    goto UnMapFile;

  spe_program = spe_image_open(filename);
  if(spe_program == NULL)
    {
      if(errno == EACCES) printf("EACCES\n");
      else if(errno == EFAULT) printf("EFAULT\n");
      else perror(NULL);
      goto UnMapFile;
    }

  /* we have a good binary */
  return 1;

UnMapFile:
  munmap(data, length);
  close(fd);
  return 0;
}
Exemplo n.º 5
0
int main(int argc, char **argv)
{
    int i;
    int ret;

    spe_context_ptr_t spe;
    spe_program_handle_t *prog;
    unsigned int entry;
    spe_stop_info_t stop_info;

    if (argc == 1) {
        fprintf(stderr, "usage: %s <spu_image>\n", argv[0]);
        return -1;
    }

    prog = spe_image_open(argv[1]);
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    spe = spe_context_create(0, NULL);
    if (!spe) {
        perror("spe_context_create");
        exit(1);
    }

    ret = spe_program_load(spe, prog);
    if (ret) {
        perror("spe_program_load");
        exit(1);
    }

    abs_params.ea_in  = (unsigned long) in;
    abs_params.ea_out = (unsigned long) out;
    abs_params.size   = SIZE;

    entry = SPE_DEFAULT_ENTRY;
    ret = spe_context_run(spe, &entry, 0, &abs_params, NULL, &stop_info);
    if (ret < 0) {
        perror("spe_context_run");
        exit(1);
    }

    ret = spe_context_destroy(spe);
    if (ret) {
        perror("spe_context_destroy");
        exit(1);
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    for (i = 0; i < SIZE; i++) {
        printf("%5.0f ", i, out[i]);
        if ((i+1) % 4 == 0) printf("\n");
    }

    return 0;
}
Exemplo n.º 6
0
void	CcdPhysicsDemo::initPhysics()
{
	setTexturing(true);
	setShadows(false);

#ifdef USE_PARALLEL_DISPATCHER
#ifdef _WIN32
	m_threadSupportSolver = 0;
	m_threadSupportCollision = 0;
#endif //
#endif

//#define USE_GROUND_PLANE 1
#ifdef USE_GROUND_PLANE
	m_collisionShapes.push_back(new btStaticPlaneShape(btVector3(0,1,0),0.5));
#else

	///Please don't make the box sizes larger then 1000: the collision detection will be inaccurate.
	///See http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=346
	m_collisionShapes.push_back(new btBoxShape (btVector3(200,CUBE_HALF_EXTENTS,200)));
#endif

#ifdef DO_BENCHMARK_PYRAMIDS
	m_collisionShapes.push_back(new btBoxShape (btVector3(CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS)));
#else
//	m_collisionShapes.push_back(new btBoxShape (btVector3(CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS)));
	m_collisionShapes.push_back(new btCylinderShape (btVector3(CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS,CUBE_HALF_EXTENTS)));
#endif



#ifdef DO_BENCHMARK_PYRAMIDS
	setCameraDistance(32.5f);
#endif

#ifdef DO_BENCHMARK_PYRAMIDS
	m_azi = 90.f;
#endif //DO_BENCHMARK_PYRAMIDS

	m_dispatcher=0;
	m_collisionConfiguration = new btDefaultCollisionConfiguration();

#ifdef USE_PARALLEL_DISPATCHER
int maxNumOutstandingTasks = 4;

#ifdef USE_WIN32_THREADING

	m_threadSupportCollision = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
								"collision",
								processCollisionTask,
								createCollisionLocalStoreMemory,
								maxNumOutstandingTasks));
#else

#ifdef USE_LIBSPE2

   spe_program_handle_t * program_handle;
#ifndef USE_CESOF
                        program_handle = spe_image_open ("./spuCollision.elf");
                        if (program_handle == NULL)
                    {
                                perror( "SPU OPEN IMAGE ERROR\n");
                    }
                        else
                        {
                                printf( "IMAGE OPENED\n");
                        }
#else
                        extern spe_program_handle_t spu_program;
                        program_handle = &spu_program;
#endif
        SpuLibspe2Support* threadSupportCollision  = new SpuLibspe2Support( program_handle, maxNumOutstandingTasks);
#endif //USE_LIBSPE2

///Playstation 3 SPU (SPURS)  version is available through PS3 Devnet
/// For Unix/Mac someone could implement a pthreads version of btThreadSupportInterface?
///you can hook it up to your custom task scheduler by deriving from btThreadSupportInterface
#endif


	m_dispatcher = new	SpuGatheringCollisionDispatcher(m_threadSupportCollision,maxNumOutstandingTasks,m_collisionConfiguration);
//	m_dispatcher = new	btCollisionDispatcher(m_collisionConfiguration);
#else

	m_dispatcher = new	btCollisionDispatcher(m_collisionConfiguration);
#endif //USE_PARALLEL_DISPATCHER

#ifdef USE_CUSTOM_NEAR_CALLBACK
	//this is optional
	m_dispatcher->setNearCallback(customNearCallback);
#endif


	m_broadphase = new btDbvtBroadphase();

#ifdef COMPARE_WITH_QUICKSTEP
	m_solver = new btOdeQuickstepConstraintSolver();
#else


#ifdef USE_PARALLEL_SOLVER

	m_threadSupportSolver = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
								"solver",
								processSolverTask,
								createSolverLocalStoreMemory,
								maxNumOutstandingTasks));

	m_solver = new btParallelSequentialImpulseSolver(m_threadSupportSolver,maxNumOutstandingTasks);
#else
	btSequentialImpulseConstraintSolver* solver = new btSequentialImpulseConstraintSolver();

	m_solver = solver;//new btOdeQuickstepConstraintSolver();

#endif //USE_PARALLEL_SOLVER

#endif



		btDiscreteDynamicsWorld* world = new btDiscreteDynamicsWorld(m_dispatcher,m_broadphase,m_solver,m_collisionConfiguration);
		m_dynamicsWorld = world;

		///SOLVER_RANDMIZE_ORDER makes cylinder stacking a bit more stable
		world->getSolverInfo().m_solverMode |= SOLVER_RANDMIZE_ORDER;

#ifdef	USER_DEFINED_FRICTION_MODEL
	//user defined friction model is not supported in 'cache friendly' solver yet, so switch to old solver

		world->getSolverInfo().m_solverMode = SOLVER_RANDMIZE_ORDER;

#endif //USER_DEFINED_FRICTION_MODEL

#ifdef DO_BENCHMARK_PYRAMIDS
		world->getSolverInfo().m_numIterations = 4;
#endif //DO_BENCHMARK_PYRAMIDS

		m_dynamicsWorld->getDispatchInfo().m_enableSPU = true;
		m_dynamicsWorld->setGravity(btVector3(0,-10,0));



#ifdef USER_DEFINED_FRICTION_MODEL
	{
		//m_solver->setContactSolverFunc(ContactSolverFunc func,USER_CONTACT_SOLVER_TYPE1,DEFAULT_CONTACT_SOLVER_TYPE);
		solver->SetFrictionSolverFunc(myFrictionModel,USER_CONTACT_SOLVER_TYPE1,DEFAULT_CONTACT_SOLVER_TYPE);
		solver->SetFrictionSolverFunc(myFrictionModel,DEFAULT_CONTACT_SOLVER_TYPE,USER_CONTACT_SOLVER_TYPE1);
		solver->SetFrictionSolverFunc(myFrictionModel,USER_CONTACT_SOLVER_TYPE1,USER_CONTACT_SOLVER_TYPE1);
		//m_physicsEnvironmentPtr->setNumIterations(2);
	}
#endif //USER_DEFINED_FRICTION_MODEL



	int i;

	btTransform tr;
	tr.setIdentity();


	for (i=0;i<gNumObjects;i++)
	{
		if (i>0)
		{
			shapeIndex[i] = 1;//sphere
		}
		else
			shapeIndex[i] = 0;
	}

	if (useCompound)
	{
		btCompoundShape* compoundShape = new btCompoundShape();
		btCollisionShape* oldShape = m_collisionShapes[1];
		m_collisionShapes[1] = compoundShape;
		btVector3 sphereOffset(0,0,2);

		comOffset.setIdentity();

#ifdef CENTER_OF_MASS_SHIFT
		comOffset.setOrigin(comOffsetVec);
		compoundShape->addChildShape(comOffset,oldShape);

#else
		compoundShape->addChildShape(tr,oldShape);
		tr.setOrigin(sphereOffset);
		compoundShape->addChildShape(tr,new btSphereShape(0.9));
#endif
	}

#ifdef DO_WALL

	for (i=0;i<gNumObjects;i++)
	{
		btCollisionShape* shape = m_collisionShapes[shapeIndex[i]];
		shape->setMargin(gCollisionMargin);

		bool isDyna = i>0;

		btTransform trans;
		trans.setIdentity();

		if (i>0)
		{
			//stack them
			int colsize = 10;
			int row = (i*CUBE_HALF_EXTENTS*2)/(colsize*2*CUBE_HALF_EXTENTS);
			int row2 = row;
			int col = (i)%(colsize)-colsize/2;


			if (col>3)
			{
				col=11;
				row2 |=1;
			}

			btVector3 pos(col*2*CUBE_HALF_EXTENTS + (row2%2)*CUBE_HALF_EXTENTS,
				row*2*CUBE_HALF_EXTENTS+CUBE_HALF_EXTENTS+EXTRA_HEIGHT,0);

			trans.setOrigin(pos);
		} else
		{
			trans.setOrigin(btVector3(0,EXTRA_HEIGHT-CUBE_HALF_EXTENTS,0));
		}

		float mass = 1.f;

		if (!isDyna)
			mass = 0.f;

		btRigidBody* body = localCreateRigidBody(mass,trans,shape);
#ifdef USE_KINEMATIC_GROUND
		if (mass == 0.f)
		{
			body->setCollisionFlags( body->getCollisionFlags() | btCollisionObject::CF_KINEMATIC_OBJECT);
			body->setActivationState(DISABLE_DEACTIVATION);
		}
#endif //USE_KINEMATIC_GROUND


		// Only do CCD if  motion in one timestep (1.f/60.f) exceeds CUBE_HALF_EXTENTS
		body->setCcdMotionThreshold( CUBE_HALF_EXTENTS );

		//Experimental: better estimation of CCD Time of Impact:
		body->setCcdSweptSphereRadius( 0.2*CUBE_HALF_EXTENTS );

#ifdef USER_DEFINED_FRICTION_MODEL
		///Advanced use: override the friction solver
		body->m_frictionSolverType = USER_CONTACT_SOLVER_TYPE1;
#endif //USER_DEFINED_FRICTION_MODEL

	}
#endif


#ifdef DO_BENCHMARK_PYRAMIDS
	btTransform trans;
	trans.setIdentity();

	btScalar halfExtents = CUBE_HALF_EXTENTS;

	trans.setOrigin(btVector3(0,-halfExtents,0));



	localCreateRigidBody(0.f,trans,m_collisionShapes[shapeIndex[0]]);

	int numWalls = 15;
	int wallHeight = 15;
	float wallDistance = 3;


	for (int i=0;i<numWalls;i++)
	{
		float zPos = (i-numWalls/2) * wallDistance;
		createStack(m_collisionShapes[shapeIndex[1]],halfExtents,wallHeight,zPos);
	}
//	createStack(m_collisionShapes[shapeIndex[1]],halfExtends,20,10);

//	createStack(m_collisionShapes[shapeIndex[1]],halfExtends,20,20);
#define DESTROYER_BALL 1
#ifdef DESTROYER_BALL
	btTransform sphereTrans;
	sphereTrans.setIdentity();
	sphereTrans.setOrigin(btVector3(0,2,40));
	btSphereShape* ball = new btSphereShape(2.f);
	m_collisionShapes.push_back(ball);
	btRigidBody* ballBody = localCreateRigidBody(10000.f,sphereTrans,ball);
	ballBody->setLinearVelocity(btVector3(0,0,-10));
#endif
#endif //DO_BENCHMARK_PYRAMIDS
//	clientResetScene();


}
Exemplo n.º 7
0
float calc_integral(float start, float end, float delta)
{
    int i;
    int ret;
    float sum = 0.0f;

    spe_program_handle_t *prog;
    spe_context_ptr_t spe[NUM_SPE];
    pthread_t thread[NUM_SPE];
    thread_arg_t arg[NUM_SPE];

    prog = spe_image_open("integral_spe.elf");
    if (!prog) {
        perror("spe_image_open");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        spe[i] = spe_context_create(0, NULL);
        if (!spe) {
            perror("spe_context_create");
            exit(1);
        }

        ret = spe_program_load(spe[i], prog);
        if (ret) {
            perror("spe_program_load");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        integral_params[i].start = start + (end-start)/NUM_SPE * i;
        integral_params[i].end   = start + (end-start)/NUM_SPE * (i+1);
        integral_params[i].delta = delta;
        integral_params[i].sum   = 0.0f;

        arg[i].spe = spe[i];
        arg[i].integral_params = &integral_params[i];

        ret = pthread_create(&thread[i], NULL, run_integral_spe, &arg[i]);
        if (ret) {
            perror("pthread_create");
            exit(1);
        }
    }

    for (i = 0; i < NUM_SPE; i++) {
        pthread_join(thread[i], NULL);
        ret = spe_context_destroy(spe[i]);
        if (ret) {
            perror("spe_context_destroy");
            exit(1);
        }
    }

    ret = spe_image_close(prog);
    if (ret) {
        perror("spe_image_close");
        exit(1);
    }

    for (i = 0; i < NUM_SPE; i++) {
        printf("[PPE] sum = %f\n", integral_params[i].sum);
        sum += integral_params[i].sum;
    }

    return sum;
}
Exemplo n.º 8
0
void	BasicDemo::initPhysics()
{

	btDefaultCollisionConfiguration* collisionConfiguration = new btDefaultCollisionConfiguration();

#ifdef USE_PARALLEL_DISPATCHER

	int maxNumOutstandingTasks = 1;//number of maximum outstanding tasks
#ifdef USE_WIN32_THREADING

	Win32ThreadSupport* threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
								"collision",
								processCollisionTask,
								createCollisionLocalStoreMemory,
								maxNumOutstandingTasks));
#else
	
	spe_program_handle_t * program_handle;
#ifndef USE_CESOF
		char* spuFileName = "../../../src/BulletMultiThreaded/out/spuCollision.elf";

			program_handle = spe_image_open (spuFileName);
			if (program_handle == NULL)
		    {
				printf( "SPU OPEN IMAGE ERROR:%s\n",spuFileName);
				exit(0);
		    }
			else
			{
				printf( "IMAGE OPENED:%s\n",spuFileName);
			}
#else
			extern spe_program_handle_t spu_program;
			program_handle = &spu_program;
#endif			
	SpuLibspe2Support* threadSupport = new SpuLibspe2Support( program_handle, maxNumOutstandingTasks);	
	
#endif // WIN32


	m_dispatcher = new	SpuGatheringCollisionDispatcher(threadSupport,maxNumOutstandingTasks,collisionConfiguration);
#else
	m_dispatcher = new	btCollisionDispatcher(collisionConfiguration);
#endif //USE_PARALLEL_DISPATCHER
	m_collisionConfiguration = new btDefaultCollisionConfiguration();


#define USE_SWEEP_AND_PRUNE 1
#ifdef USE_SWEEP_AND_PRUNE
#define maxProxies 8192
	btVector3 worldAabbMin(-10000,-10000,-10000);
	btVector3 worldAabbMax(10000,10000,10000);
	m_overlappingPairCache = new btAxisSweep3(worldAabbMin,worldAabbMax,maxProxies);
	//m_overlappingPairCache = new btMultiSapBroadphase();


#else
	m_overlappingPairCache = new btSimpleBroadphase;
#endif //USE_SWEEP_AND_PRUNE



	btSequentialImpulseConstraintSolver* sol = new btSequentialImpulseConstraintSolver;
	m_solver = sol;
	

	m_dynamicsWorld = new btDiscreteDynamicsWorld(m_dispatcher,m_overlappingPairCache,m_solver,m_collisionConfiguration);
	m_dynamicsWorld->getDispatchInfo().m_enableSPU = true;

	m_dynamicsWorld->setGravity(btVector3(0,-10,0));


	///create a few basic rigid bodies


	//static ground
#ifdef USE_GROUND_BOX
	btCollisionShape* groundShape = new btBoxShape(btVector3(btScalar(50.),btScalar(50.),btScalar(50.)));
#else
	btCollisionShape* groundShape = new btSphereShape(btScalar(50.));
#endif//USE_GROUND_BOX

	m_collisionShapes.push_back(groundShape);

	btTransform groundTransform;
	groundTransform.setIdentity();
	groundTransform.setOrigin(btVector3(0,-50,0));
	localCreateRigidBody(btScalar(0.),groundTransform,groundShape);

	//create a few dynamic sphere rigidbodies (re-using the same sphere shape)
	//btCollisionShape* sphereShape = new btBoxShape(btVector3(1,1,1));
	btCollisionShape* sphereShape = new btSphereShape(btScalar(1.));
	m_collisionShapes.push_back(sphereShape);

	int i;
	for (i=0;i<gNumObjects;i++)
	{
		
		sphereShape->setMargin(gCollisionMargin);
		btTransform trans;
		trans.setIdentity();
		//stack them
		int colsize = 2;
		int row = (int)((i*HALF_EXTENTS*2)/(colsize*2*HALF_EXTENTS));
		int row2 = row;
		int col = (i)%(colsize)-colsize/2;
		btVector3 pos(col*2*HALF_EXTENTS + (row2%2)*HALF_EXTENTS,
			row*2*HALF_EXTENTS+HALF_EXTENTS,0);

		trans.setOrigin(pos);
		//btRigidBody* body = localCreateRigidBody(btScalar(1.),trans,sphereShape);
		localCreateRigidBody(btScalar(1.),trans,sphereShape);
	}

	//clientResetScene();
}
Exemplo n.º 9
0
extern
cl_program clCreateProgramWithBinary (cl_context context,
                                      cl_uint num_devices,
                                      const cl_device_id *device_list,
                                      const size_t *lengths,
                                      const char **binaries,
                                      cl_int *binary_status,
                                      cl_int *errcode_ret)
{


  if(context == NULL || context == (cl_context)0)
    {
      *errcode_ret = CL_INVALID_CONTEXT;
      return (cl_program)0;
    }

  
  if(num_devices < 1 || device_list == NULL)
    {
      *errcode_ret = CL_INVALID_VALUE;
      return (cl_program)0;
    }

  if(lengths == NULL || binaries == NULL)
    {
      *errcode_ret = CL_INVALID_VALUE;
      return (cl_program)0;
    }

  PRINT_DEBUG("\n====\tCreating program  \t====\n");

  cl_program program = malloc(sizeof(struct _cl_program));

  program->program_ref_count = 1;
  program->program_context = context;
  program->program_num_devices = num_devices;

  //Should memcpy() these
  program->program_devices = device_list;

  PRINT_DEBUG("Set devices\n");

  program->program_source = NULL;
  program->program_binary_sizes = lengths;
   
  PRINT_DEBUG("Before malloc\n");
 
  program->program_binaries = malloc(sizeof(char *));

  PRINT_DEBUG("After first malloc\n");

  *(program->program_binaries) = malloc((*lengths)+1);

  PRINT_DEBUG("After second malloc\n");

  strcpy(*(program->program_binaries),*((char **)binaries));

  PRINT_DEBUG("After strcpy\n");

  char *name = *(program->program_binaries);
  name[(*lengths)] = '\0';

  PRINT_DEBUG("Opening spe image %s\n", name);

  program->program_elfs = spe_image_open(name);  

  if (!program->program_elfs) {
    PRINT_DEBUG("Could not open spe image\n");
    *errcode_ret = CL_INVALID_BINARY;
    return (cl_program)0;
  }

  PRINT_DEBUG("\n====\tReturning program  \t====\n");

  return program;

}
Exemplo n.º 10
0
void
pocl_cellspu_run 
(void *data, 
 _cl_command_node* cmd)
{
  struct data *d;
  int error;
  char bytecode[POCL_FILENAME_LENGTH];
  char assembly[POCL_FILENAME_LENGTH];
  char module[POCL_FILENAME_LENGTH];
  char command[COMMAND_LENGTH];
  char workgroup_string[WORKGROUP_STRING_LENGTH];
  unsigned device;
  struct pocl_argument *al;
  size_t x, y, z;
  unsigned i;
  pocl_workgroup w;
  char* tmpdir = cmd->command.run.tmp_dir;
  cl_kernel kernel = cmd->command.run.kernel;
  struct pocl_context *pc = &cmd->command.run.pc;
  const char* kern_func = kernel->function_name;
  unsigned int entry = SPE_DEFAULT_ENTRY;

  assert (data != NULL);
  d = (struct data *) data;

  error = snprintf 
    (module, POCL_FILENAME_LENGTH,
     "%s/parallel.so", tmpdir);
  assert (error >= 0);

  // This is the entry to the kenrel. We currently hard-code it
  // into the SPU binary. Resulting in only one entry-point per 
  // SPU image.
  // TODO: figure out which function to call given what conditions
  snprintf (workgroup_string, WORKGROUP_STRING_LENGTH,
            "_%s_workgroup_fast", kernel->function_name);


  if ( access (module, F_OK) != 0)
    {
      char *llvm_ld;
      error = snprintf (bytecode, POCL_FILENAME_LENGTH,
                        "%s/linked.bc", tmpdir);
      assert (error >= 0);
      
      if (getenv("POCL_BUILDING") != NULL)
        llvm_ld = BUILDDIR "/tools/llvm-ld/pocl-llvm-ld";
      else if (access(PKGLIBEXECDIR "/pocl-llvm-ld", X_OK) == 0)
        llvm_ld = PKGLIBEXECDIR "/pocl-llvm-ld";
      else
        llvm_ld = "pocl-llvm-ld";

      error = snprintf (command, COMMAND_LENGTH,
			"%s --disable-opt -link-as-library -o %s %s/%s",
                        llvm_ld, bytecode, tmpdir, POCL_PARALLEL_BC_FILENAME);
      assert (error >= 0);
      
      error = system(command);
      assert (error == 0);
      
      error = snprintf (assembly, POCL_FILENAME_LENGTH,
			"%s/parallel.s",
			tmpdir);
      assert (error >= 0);
      
      // "-relocation-model=dynamic-no-pic" is a magic string,
      // I do not know why it has to be there to produce valid
      // sos on x86_64
      error = snprintf (command, COMMAND_LENGTH,
			LLC " " HOST_LLC_FLAGS " -o %s %s",
			assembly,
			bytecode);
      assert (error >= 0);
      error = system (command);
      assert (error == 0);
           

      // Compile the assembly version of the OCL kernel with the
      // C wrapper to get a spulet
      error = snprintf (command, COMMAND_LENGTH,
			"spu-gcc lib/CL/devices/cellspu/spe_wrap.c -o %s %s "
			" -Xlinker --defsym -Xlinker _ocl_buffer=%d"
			" -Xlinker --defsym -Xlinker kernel_command=%d"
			" -I . -D_KERNEL=%s -std=c99",
			module,
			assembly, 
			CELLSPU_OCL_BUFFERS_START,
			CELLSPU_KERNEL_CMD_ADDR,
			workgroup_string);
      assert (error >= 0);
#ifdef DEBUG_CELLSPU_DRIVER
      printf("compiling: %s\n", command); fflush(stdout); 
#endif
      error = system (command);
      assert (error == 0);

    }
      
    // Load the SPU with the newly generated binary
    hello_spu = spe_image_open( (const char*)module );
    if( spe_program_load( spe_context, hello_spu) )
        perror("spe_program_load fails");
    
//
//  /* Find which device number within the context correspond
//     to current device.  */
//  for (i = 0; i < kernel->context->num_devices; ++i)
//    {
//      if (kernel->context->devices[i]->data == data)
//	{
//	  device = i;
//	  break;
//	}
//    }
//

  // This structure gets passed to the device.
  // It contains all the info needed to run a kernel  
  __kernel_exec_cmd dev_cmd;
  dev_cmd.work_dim = cmd->command.run.pc.work_dim;
  dev_cmd.num_groups[0] = cmd->command.run.pc.num_groups[0];
  dev_cmd.num_groups[1] = cmd->command.run.pc.num_groups[1];
  dev_cmd.num_groups[2] = cmd->command.run.pc.num_groups[2];

  dev_cmd.global_offset[0] = cmd->command.run.pc.global_offset[0];
  dev_cmd.global_offset[1] = cmd->command.run.pc.global_offset[1];
  dev_cmd.global_offset[2] = cmd->command.run.pc.global_offset[2];


  // the code below is lifted from pthreads :) 
  uint32_t *arguments = dev_cmd.args;

  for (i = 0; i < kernel->num_args; ++i)
    {
      al = &(kernel->dyn_arguments[i]);
      if (kernel->arg_is_local[i])
        {
          chunk_info_t* local_chunk = cellspu_malloc_local (d, al->size);
          if (local_chunk == NULL)
            POCL_ABORT ("Could not allocate memory for a local argument. Out of local mem?\n");

          dev_cmd.args[i] = local_chunk->start_address;

        }
      else if (kernel->arg_is_pointer[i])
        {
          /* It's legal to pass a NULL pointer to clSetKernelArguments. In 
             that case we must pass the same NULL forward to the kernel.
             Otherwise, the user must have created a buffer with per device
             pointers stored in the cl_mem. */
          if (al->value == NULL)
            arguments[i] = (uint32_t)NULL;
          else
            arguments[i] = \
              ((chunk_info_t*)((*(cl_mem *)\
                (al->value))->device_ptrs[0]))->start_address;
		//TODO: '0' above is the device number... don't hard-code!
        }
      else if (kernel->arg_is_image[i])
        {
          POCL_ABORT_UNIMPLEMENTED();
//          dev_image2d_t di;      
//          cl_mem mem = *(cl_mem*)al->value;
//          di.data = &((*(cl_mem *) (al->value))->device_ptrs[device]);
//          di.data = ((*(cl_mem *) (al->value))->device_ptrs[device]);
//          di.width = mem->image_width;
//          di.height = mem->image_height;
//          di.rowpitch = mem->image_row_pitch;
//          di.order = mem->image_channel_order;
//          di.data_type = mem->image_channel_data_type;
//          void* devptr = pocl_cellspu_malloc(data, 0, sizeof(dev_image2d_t), NULL);
//          arguments[i] = malloc (sizeof (void *));
//          *(void **)(arguments[i]) = devptr; 
//          pocl_cellspu_write (data, &di, devptr, sizeof(dev_image2d_t));
        }
      else if (kernel->arg_is_sampler[i])
        {
          POCL_ABORT_UNIMPLEMENTED();
//          dev_sampler_t ds;
//          
//          arguments[i] = malloc (sizeof (void *));
//          *(void **)(arguments[i]) = pocl_cellspu_malloc(data, 0, sizeof(dev_sampler_t), NULL);
//          pocl_cellspu_write (data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t));
        }
      else
        {
          arguments[i] = (uint32_t)al->value;
        }
    }

  // allocate memory for kernel local variables
  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    {
      al = &(kernel->dyn_arguments[i]);
      arguments[i] = (uint32_t)malloc (sizeof (void *));
      *(void **)(arguments[i]) = cellspu_malloc_local(data, al->size);
    }

  // the main loop on the spe needs an auxiliary struct for to get the 
  // number of arguments and such. 
  __kernel_metadata kmd;
  strncpy( kmd.name, workgroup_string, sizeof( kmd.name ) );  
  kmd.num_args = kernel->num_args;
  kmd.num_locals = kernel->num_locals;
  // TODO: fill in the rest, if used by the spu main function.

  // TODO malloc_local should be given the 'device data'. as long as teh 
  // spu context is global this is ok.
  void *chunk = cellspu_malloc_local( NULL, sizeof(__kernel_metadata) ); 
  void *kernel_area = ((chunk_info_t*)chunk)->start_address;
  cellspu_memwrite( kernel_area, &kmd, sizeof(__kernel_metadata) );
  dev_cmd.kernel = kernel_area;
  
  // finish up the command, send it to SPE
  dev_cmd.status =POCL_KST_READY;
  cellspu_memwrite( (void*)CELLSPU_KERNEL_CMD_ADDR, &dev_cmd, sizeof(__kernel_exec_cmd) );
       
  // Execute code on SPU. This starts with the main() in the spu - see spe_wrap.c
  if (spe_context_run(spe_context,&entry,0,NULL,NULL,NULL) < 0)
    perror("context_run error");

//  for (z = 0; z < pc->num_groups[2]; ++z)
//    {
//      for (y = 0; y < pc->num_groups[1]; ++y)
//        {
//          for (x = 0; x < pc->num_groups[0]; ++x)
//            {
//              pc->group_id[0] = x;
//              pc->group_id[1] = y;
//              pc->group_id[2] = z;
//
//              w (arguments, pc);
//
//            }
//        }
//    }


  // Clean-up ? 
  for (i = 0; i < kernel->num_args; ++i)
    {
      if (kernel->arg_is_local[i])
        pocl_cellspu_free(data, 0, *(void **)(arguments[i]));
    }
  for (i = kernel->num_args;
       i < kernel->num_args + kernel->num_locals;
       ++i)
    pocl_cellspu_free(data, 0, *(void **)(arguments[i]));
}