Ejemplo n.º 1
0
void cluster_t::update_projection_gpu(){
#ifdef USE_GPU
      //
      // 1) Run kernel to copy U_project into U_project_prev
      // 2) Run kernel to compute sub_fnorms and store in 
      // buffer_subject_variable_block_norms.
      // 3) Fetch sub_fnorm from GPU
      //
  int x_dim = BLOCK_WIDTH * variable_blocks;
  runKernel("store_U_project_prev",kernel_store_U_project_prev,x_dim,n,1,BLOCK_WIDTH,1,1);
  bool debug_gpu1 = false;
  if(debug_gpu1){
    float testArr[n*p];
    readFromBuffer(buffer_U_project_prev,n*p,testArr,"buffer_U_project_prev");
    for(int i=0;i<n;++i){
      for(int j=0;j<p;++j){
        if(i>(n-2) && j>(p-10)){
          cerr<<"GPU: U_project_prev: "<<i<<","<<j<<": "<<testArr[i*p+j]<<endl;
        }
      }
    }
    for(int i=0;i<n;++i){
      for(int j=0;j<p;++j){
        if(i>(n-2) && j>(p-10)){
          cerr<<"CPU: U_project_prev: "<<i<<","<<j<<": "<<U_project_prev[i*p+j]<<endl;
        }
      }
    }
  }
  // for debugging only
  //writeToBuffer(buffer_V_project_coeff,triangle_dim,V_project_coeff,"buffer_V_project_coeff");
  //writeToBuffer(buffer_U_project_orig,n*p,U_project_orig,"buffer_U_project_orig");
  runKernel("iterate_projection",kernel_iterate_projection,x_dim,n,1,BLOCK_WIDTH,1,1);
  readFromBuffer(buffer_subject_variable_block_norms,n*variable_blocks,sub_fnorm,"buffer_subject_variable_block_norms");
  bool debug_gpu = false;
  if (debug_gpu){
    cerr<<"GPU iterate projection:\n";
    float test_U[n*p];
    readFromBuffer(buffer_U_project,n*p,test_U,"buffer_U_project");
        for(int i=n-5;i<n;++i){
          cerr<<i<<":";
          for(int j=0;j<p;++j){
            cerr<<" "<<test_U[i*p+j];
          }
          cerr<<endl;
        }


    for(int i=(0);i<n;++i){
      cerr<<i<<":";
      for(int j=0;j<variable_blocks;++j){
        cerr<<" "<<sub_fnorm[i*variable_blocks+j];
      }
      cerr<<endl;
    }
  }
#endif
}
Ejemplo n.º 2
0
void OCLSample::timeKernel(cl::Kernel kernel, double& elapsed,
                           double& average) {
  cl::Event* events = new cl::Event[numIterations_];
  cl_int result;

  elapsed = 0.0;

  for(unsigned i = 0; i < numIterations_; ++i) {
    cl_ulong start, end;
    runKernel(kernel, &events[i]);

    queue_.flush();
    events[i].wait();

    result = events[i].getProfilingInfo<cl_ulong>(CL_PROFILING_COMMAND_START,
                                                  &start);
    assert(result == CL_SUCCESS && "Unable to get profiling information");
    result = events[i].getProfilingInfo<cl_ulong>(CL_PROFILING_COMMAND_END,
                                                  &end);
    assert(result == CL_SUCCESS && "Unable to get profiling information");
    elapsed += (double)1e-9 * (end - start);
  }

  average = elapsed / (double)numIterations_;

  delete [] events;
}
static CALresult runNuStep(MWCALInfo* ci,
                           SeparationCALMem* cm,
                           const IntegralArea* ia,
                           const SeparationCALChunks* chunks,
                           CALint pollingMode,
                           CALuint nuStep)
{
    CALdomain domain;
    CALuint i;
    CALresult err = CAL_RESULT_OK;

    err = setNuKernelArgs(cm, ia, nuStep);
    if (err != CAL_RESULT_OK)
        return err;

    domain.x = 0;
    domain.width = ia->r_steps;

    for (i = 0; i < chunks->nChunkMu && err == CAL_RESULT_OK; ++i)
    {
        domain.y = chunks->chunkMuBorders[i];
        domain.height = chunks->chunkMuBorders[i + 1] - chunks->chunkMuBorders[i];

        mw_begin_critical_section();
        err = runKernel(ci, &domain, pollingMode, chunks->chunkWaitTime);
        mw_end_critical_section();
    }

    return err;
}
Ejemplo n.º 4
0
void runKernel(cl_runtime_env env,
               std::string kernel_name,
               double* vars,
               double* out,
               int start_index,
               int out_len)

{
    kernel kern;
    for (int i = 1; i < env.num_kerns; i++)
    {
        if (env.kernels[i].name == kernel_name)
            kern = env.kernels[i];
    }
    runKernel(env.cv, env.cl_kernels[kernel_name],
              kern,
              env.gpu_data,
              vars);
    cl_int err;

    err = clEnqueueReadBuffer(env.cv.commands, env.gpu_data["out"].array,
                              true,
                              start_index,
                              sizeof(double)*out_len,
                              out,
                              0, NULL, NULL);
    CHK_ERR(err);

    err = clFlush(env.cv.commands);
    CHK_ERR(err);
}
Ejemplo n.º 5
0
void cluster_t::evaluate_obj_gpu(){
#ifdef USE_GPU
  int x_dim = BLOCK_WIDTH * n;
  runKernel("evaluate_obj",kernel_evaluate_obj,x_dim,n,1,BLOCK_WIDTH,1,1);
  readFromBuffer(buffer_n_norms,n,norm1_arr,"buffer_n_norms");
  readFromBuffer(buffer_n2_norms,triangle_dim,norm2_arr,"buffer_n_norms2");
  bool debug_gpu =  false;
  if(debug_gpu){
    cerr<<"GPU NORM1:";
    for(int i=0;i<n;++i){
      cerr<<" "<<norm1_arr[i];
    }
    cerr<<endl;
    for(int i1=0;i1<n-1;++i1){
      cerr<<"GPU NORM2["<<i1<<"]:";
      for(int i2=i1+1;i2<n;++i2){
        if (i2>i1){
          cerr<<" "<<norm2_arr[offsets[i1]+i2-i1];
        }
      }
      cerr<<endl;
    }
  }
#endif
}
Ejemplo n.º 6
0
void AppManager::render(){
    double dt = 1e-45f;//timer.elapsedAndRestart();
    runKernel(dt);
    
    glViewport(0, 0, window_width*2, window_height*2);
    visualize->use();
    
    float rx = (1.0f/(float)Nx);
    float ry = (1.0f/(float)Ny);
    
    glUniform1f(visualize->getUniform("rx"), rx);
    glUniform1f(visualize->getUniform("ry"), ry);
    
    glUniform1i(visualize->getUniform("QTex"), 0);
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, kernel0->getTexture());
    
    glBindVertexArray(vao);
    glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, NULL);
    glBindVertexArray(0);
    
    visualize->disuse();
    
    CHECK_GL_ERRORS();
}
Ejemplo n.º 7
0
int main(int argc, char ** argv)
{
  long lower, upper;
  int WGS;

  if (argc != 4) {
    printf("not 2 arguments\n");
    return 1;
  }
  sscanf(argv[1], "%ld", &lower);
  sscanf(argv[2], "%ld", &upper);
  sscanf(argv[3], "%d", &WGS);

  long results_size = (upper*(upper-1))/2;

  long* results = (long *) malloc(sizeof(long)*WGS);
  int i;
  for(i = 0; i < WGS; i ++) results[i] = 0L;

  printf("%ld\n", results_size);
  FILE *fp;
  char *KernelSource;
  cl_kernel kernel;
  fp = fopen("totient_kernel.cl", "r");
  if (!fp) {
    fprintf(stderr, "Failed to load kernel.\n");
    exit(1);
  }
  KernelSource = (char*)malloc(MAX_SOURCE_SIZE);
  fread( KernelSource, 1, MAX_SOURCE_SIZE, fp);
  fclose( fp );
  
  size_t local[1];
  size_t global[1];
  local[0] = WGS;
  global[0] = results_size;
  
  initGPU();

  // Fill in here:
  kernel = setupKernel( KernelSource, "totient", 2,
                                  LongArr, WGS, results,
                                  IntConst, WGS);

  // Fill in here:
  runKernel( kernel, 1, global, local);

  long tot = 0;
  int l;
  for(l = 0; l < WGS; l ++) tot += results[l];
  
  printf("C: Sum of Totients between [%ld..%ld] is %ld\n",
    lower, upper, tot);
  return 0;
}
Ejemplo n.º 8
0
void cluster_t::finalize_iteration_gpu(){
#ifdef USE_GPU
  int x_dim = BLOCK_WIDTH * n;
  runKernel("get_U_norm_diff",kernel_get_U_norm_diff,x_dim,1,1,BLOCK_WIDTH,1,1);
  readFromBuffer(buffer_n_norms,n,norm1_arr,"buffer_n_norms");
  float gpu_U_norm_diff = 0;
  for(int i=0;i<n;++i){
    gpu_U_norm_diff+=norm1_arr[i];
  }
  U_norm_diff = sqrt(gpu_U_norm_diff);

  if(config->verbose)cerr<<"FINALIZE_ITERATION: GPU U_norm_diff: "<<U_norm_diff<<endl;
#endif
}
Ejemplo n.º 9
0
void GLWidget::drawPoints()
{
    runKernel();
    
    glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
    glEnable(GL_POINT_SPRITE);
    glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE);
    glDisable(GL_DEPTH_TEST);
    glEnable(GL_BLEND);
    glBlendFunc(GL_SRC_ALPHA, GL_ONE);
    glDepthMask(GL_FALSE);
    
    particleShaderProgram->bind();

    particleShaderProgram->enableAttributeArray(particleVertexLocation);
    particleShaderProgram->enableAttributeArray(particleColorLocation);

    particlesVBO->bind();
    int tupleSize = 4;
    particleShaderProgram->setAttributeBuffer(particleVertexLocation, GL_FLOAT, 0, tupleSize, sizeof(Particle));
    particleShaderProgram->setAttributeBuffer(particleColorLocation, GL_FLOAT, tupleSize*sizeof(float), tupleSize, sizeof(Particle));

    particleShaderProgram->setUniformValue(particleMatrixLocation, pMatrix * vMatrix);

    particleShaderProgram->setUniformValue(particleSamplerPSLocation, 0);
    particleShaderProgram->setUniformValue(particleSamplerVelLocation, 1);
    
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, psTexture);
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, velTexture);

    glDrawArrays(GL_POINTS, 0, vertexNumber);

    particleShaderProgram->disableAttributeArray(particleVertexLocation);
    particleShaderProgram->disableAttributeArray(particleColorLocation);
    
    particlesVBO->release();

    particleShaderProgram->release();

    glDisable(GL_POINT_SPRITE);
    glDepthMask(GL_TRUE);
    glEnable(GL_DEPTH_TEST);
    
}
Ejemplo n.º 10
0
void cluster_t::store_U_projection_gpu(){
#ifdef USE_GPU
  int x_dim = BLOCK_WIDTH * variable_blocks;
  runKernel("store_U_project",kernel_store_U_project,x_dim,n,1,BLOCK_WIDTH,1,1);
  bool debug_gpu = false;
  if(debug_gpu){
    float testArr[n*p];
    readFromBuffer(buffer_U_project,n*p,testArr,"buffer_U_project");
    for(int i=0;i<n;++i){
      for(int j=0;j<p;++j){
        if(i>(n-3) && j>(p-3)){
          cerr<<"GPU store U_project for subject,var: "<<i<<","<<j<<": "<<testArr[i*p+j]<<endl;
        }
      }
    }
  }
#endif
}
Ejemplo n.º 11
0
void cluster_t::initialize_gpu(){
#ifdef USE_GPU
  int x_dim = BLOCK_WIDTH * variable_blocks;
  runKernel("init_U",kernel_init_U,x_dim,n,1,BLOCK_WIDTH,1,1);
  bool debug_gpu = false;
  if(debug_gpu){
    float testArr[n*p];
    readFromBuffer(buffer_U_project,n*p,testArr,"buffer_U_project");
    for(int i=0;i<n;++i){
      for(int j=0;j<p;++j){
        if(i==(n-10) && j>(p-10)){
          cerr<<"GPU: U_project_orig "<<i<<","<<j<<": "<<testArr[i*p+j]<<endl;
        }
      }
    }
  }
#endif
}
Ejemplo n.º 12
0
void cluster_t::init_v_project_coeff_gpu(){
#ifdef USE_GPU
  float unweighted_lambda = mu * dist_func / rho;
  writeToBuffer(buffer_unweighted_lambda, 1, &unweighted_lambda, "buffer_unweighted_lambda");
  runKernel("init_v_project_coeff",kernel_init_v_project_coeff,BLOCK_WIDTH*n,n,1,BLOCK_WIDTH,1,1);
  bool debug_gpu = false;
  if(debug_gpu){
    float * testv = new float[triangle_dim];
    readFromBuffer(buffer_V_project_coeff,triangle_dim,testv,"buffer_V_project_coeff");
    for(int index1=0;index1<n-1;++index1){
      for(int index2=index1+1;index2<n;++index2){
        float & scaler  = testv[offsets[index1]+(index2-index1)];
        if (scaler !=0 && scaler !=1 )
          cerr<<"GPU Init_V Index: "<<index1<<","<<index2<<": "<<scaler<<endl;
      }
    }
  }
#endif
}
Ejemplo n.º 13
0
void cluster_t::update_u_gpu(){
#ifdef USE_GPU
  writeToBuffer(buffer_dist_func,1,&dist_func,"buffer_dist_func");
  writeToBuffer(buffer_rho,1,&rho,"buffer_rho");
  int x_dim = BLOCK_WIDTH * variable_blocks;
  runKernel("update_U",kernel_update_U,x_dim,n,1,BLOCK_WIDTH,1,1);
  bool debug_gpu = false;
  if(debug_gpu){
    float testArr[n*p];
    readFromBuffer(buffer_U,n*p,testArr,"buffer_U");
    for(int i=0;i<n;++i){
      for(int j=0;j<p;++j){
        if(i==(n-10) && j>(p-10)){
          cerr<<"update U GPU: U: "<<i<<","<<j<<": "<<testArr[i*p+j]<<endl;
        }
      }
    }
  }
#endif
}
Ejemplo n.º 14
0
void cluster_t::update_map_distance_gpu(){
#ifdef USE_GPU
  int x_dim = BLOCK_WIDTH * variable_blocks;
  runKernel("update_map_distance",kernel_update_map_distance,x_dim,1,1,BLOCK_WIDTH,1,1);
  float norm1_arr[variable_blocks];
  float norm2_arr[variable_blocks];
  readFromBuffer(buffer_variable_block_norms1,variable_blocks,norm1_arr,"buffer_variable_block_norms1");
  readFromBuffer(buffer_variable_block_norms2,variable_blocks,norm2_arr,"buffer_variable_block_norms2");
  float norm1=0,norm2=0;
  for(int i=0;i<variable_blocks;++i){
    //cerr<<"GPU Block "<<i<<" norm: "<<norm1_arr[i]<<","<<norm2_arr[i]<<endl;
    norm1+=norm1_arr[i];
    norm2+=norm2_arr[i];
  }
  if(config->verbose) cerr<<"GPU Norm1 was "<<norm1<<" and norm2 was "<<norm2<<endl;
  float norm = norm1+norm2;
  this->map_distance = norm;
  this->dist_func = sqrt(this->map_distance+epsilon);
  if(config->verbose)cerr<<"GET_MAP_DISTANCE: New map distance is "<<norm<<" with U distance="<<norm1<<", V distance="<<norm2<<" dist_func: "<<dist_func<<endl;
#endif
}
Ejemplo n.º 15
0
Color* Engine::getPixels() {
	if(!ocl->isDone())
		runKernel();

	return colors;
}
Ejemplo n.º 16
0
void Engine::dataChanged() {
	runKernel();
}
Ejemplo n.º 17
0
int main()
{
	int width = 512;
	int height = 512;

	// Creation du device
	cutilSafeCall( cudaSetDevice( cutGetMaxGflopsDeviceId() ) );

	// Creation des buffers sur CPU
	int * a = new int[width*height];
	int * b = new int[width*height];
	int * res = new int[width*height];
	for(int i = 0; i < width*height; i++)
	{
		a[i] = (int)ceil(((double)rand()/ (double)RAND_MAX)*100);
		b[i] = (int)ceil(((double)rand()/ (double)RAND_MAX)*100);
	}

	// Allocation des objects on the device
	// *** data
	unsigned int size = width * height * sizeof(int);
	std::cout << "Allocation d'un buffer de taille " << width*height << "\n";
	int* d_a = NULL;
	int* d_b = NULL;
	int* d_res = NULL;
    cutilSafeCall( cudaMalloc( (void**) &d_a, size));
	cutilSafeCall( cudaMalloc( (void**) &d_b, size));
	cutilSafeCall( cudaMalloc( (void**) &d_res, size));

	// Copy des donnees
	cutilSafeCall( cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice));
	cutilSafeCall( cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice));

    // Lancer le calcul
	std::cout << "Lancer le kernel ... \n";
	runKernel(d_a, d_b, d_res, width*height);
	cutilSafeCall( cutilDeviceSynchronize() );

	// Copie DeviceHost
	cutilSafeCall( cudaMemcpy(res, d_res, size, cudaMemcpyDeviceToHost));

	// Verification du test
	int i = 0;
	for(;i < width*height;i++)
		if(res[i] != a[i] + b[i])
			std::cout << "Error : [" << i << "] " << res[i] << " != " << a[i] << " + " << b[i] << std::endl;


	// Liberation des ressources
	// *** Device
	cutilSafeCall(cudaFree(d_a));
	cutilSafeCall(cudaFree(d_b));
	cutilSafeCall(cudaFree(d_res));
	// *** CPU
	delete[] res;
	delete[] a;
	delete[] b;

	// Close device
	cutilDeviceReset();
	std::cout << "Test result : " << ((i == width*height) ? "Succes" : "Error" ) <<  std::endl;
	std::cout.flush();

	return 0;
}
Ejemplo n.º 18
0
int main()
{
	int width = 800;
	int height = 600;

	int mesh_width = 256;
	int mesh_height = 256;

	// Creation du device
	cutilSafeCall( cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ) );

	// Creation d'une fenetre
	C3::Window OpenGLWin;
	OpenGLWin.Create(C3::WindowMode(width,height),"CudaC3");
	
	// Glew init
	GLenum err = glewInit();
	if(err != GLEW_OK)
		std::cout << "Error on GLEW initialization.\n";


	// Configuration OpenGL
	glClearColor(0.0, 0.0, 0.0, 1.0);
	glDisable(GL_DEPTH_TEST);
	glViewport(0, 0, width, height);
	glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(60.0, (GLfloat)width / (GLfloat) height, 0.1, 10.0);

	// VBO
	// *** Create
	GLuint vbo;
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);
	// *** Initialize
	unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
	glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
	glBindBuffer(GL_ARRAY_BUFFER, 0);
	// *** Register in CUDA
	cudaGraphicsResource *cuda_vbo_resource = NULL;
	cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));


	float g_fAnim = 0.f;
	int nbFrame = 0;
	float timeFPS = 0.f;
	while(OpenGLWin.IsOpened())
	{
		// Events
		C3::Event event;
		while(OpenGLWin.PoolEvent(event))
		{
                        //std::cout << "Event !" << std::endl;
			if(event.Type == C3::Event::Closed)
			{
				std::cout << "Close ... " << std::endl;
				OpenGLWin.Close();
			}
			else if(event.Type == C3::Event::KeyPressed)
			{
				if(event.Key.Code == C3::Key::Escape)
				{
					std::cout << "Close ... " << std::endl;
					OpenGLWin.Close();
				}
			}
		}

		// Mise a jour du temps
		g_fAnim += OpenGLWin.GetFrameTime() / 1000.f;
		timeFPS += OpenGLWin.GetFrameTime() / 1000.f;
		nbFrame++;
		if(timeFPS > 1.0f)
		{
			std::stringstream ss;
			ss << "CudaC3 [" << (int)ceil( nbFrame / timeFPS ) << " FPS]";
			OpenGLWin.SetTitle(ss.str());
			timeFPS = 0.f;
			nbFrame = 0;
		}
		
		// Draw the scene
		glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

		// Lancer le calcul CUDA
		// *** map OpenGL buffer object for writing from CUDA
		float4 *dptr;
		cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource, 0));
		size_t num_bytes; 
		cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes, cuda_vbo_resource));
		// *** Run kernel
		runKernel(dptr, mesh_width, mesh_height,g_fAnim);
		cutilSafeCall( cutilDeviceSynchronize() );
		// *** Unmap
		cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0));
		
		// OpenGL
		// *** Make some transformation
		glMatrixMode(GL_MODELVIEW);
		glLoadIdentity();
		glTranslatef(0.0, 0.0, -3.0);
		glRotatef(0.0, 1.0, 0.0, 0.0);
		glRotatef(0.0, 0.0, 1.0, 0.0);
		// *** Render VBO
		// --- Bind
		glBindBuffer(GL_ARRAY_BUFFER, vbo);
		glVertexPointer(4, GL_FLOAT, 0, 0);
		// --- Draw
		glEnableClientState(GL_VERTEX_ARRAY);
		glColor3f(1.0, 0.0, 0.0);
		glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
		glDisableClientState(GL_VERTEX_ARRAY);

		// Swap buffers
		OpenGLWin.Display();
	}

	// Liberation des ressources
	cudaGraphicsUnregisterResource(cuda_vbo_resource);
	
	glBindBuffer(1, vbo);
	glDeleteBuffers(1, &vbo);

	// Close device
	cutilDeviceReset();

	return 0;
}
Ejemplo n.º 19
0
static void mdlOutputs(SimStruct *S, int_T tid)
{
  //printf("mdlOutputs at %g\n", ssGetT(S));
  rtsys = (RTsys*) ssGetUserData(S);

  if (rtsys->init_phase) {
    /* Failure during initialization */
    return;
  }

  real_T *y = ssGetOutputPortRealSignal(S,0);
  real_T *n = ssGetOutputPortRealSignal(S,1);
  real_T *s = ssGetOutputPortRealSignal(S,2);
  real_T *m = ssGetOutputPortRealSignal(S,3);
  real_T *energyConsumption = ssGetOutputPortRealSignal(S,4);
  int i, j, k, detected;
  double dTime; 
 
  DataNode *dn;
  Task* task;
  UserTask* t;
  InterruptHandler* hdl;
  Monitor *mon;

  if (!rtsys->started && ssGetT(S) == 0.0) {
    rtsys->started = true;
    return;
  }

  if (!rtsys->mdlzerocalled) {
    printf("Zero crossing detection must be turned on in order to run TrueTime!\n");
    ssSetErrorStatus(S, "Zero crossing detection must be turned on in order to run TrueTime!");
    return;
  }
  
  /* Storing the time */

  rtsys->time = ssGetT(S) * rtsys->clockDrift + rtsys->clockOffset;
  
  detected = 0;



  /* Check interrupts */
  
  i = 0;
  dn = (DataNode*) rtsys->triggerList->getFirst();
  while (dn != NULL) {
    if (fabs(rtsys->interruptinputs[i]-rtsys->oldinterruptinputs[i]) > 0.1) {
      hdl = (InterruptHandler*) dn->data;
      Trigger* trig = hdl->trigger;
      if (rtsys->time - trig->prevHit > trig->latency) { 
	// Trigger interrupt handler
	if (hdl->myList == rtsys->readyQ) {
	  // handler serving older interrupts
	  hdl->pending++;
	} else {
	  hdl->moveToList(rtsys->readyQ);
	  detected = 1;
	}
 	trig->prevHit = rtsys->time;
      } else { 
	//printf("Call to interrupt handler %s ignored at time %f. Within interrupt latency!\n", hdl->name, rtsys->time);
      }
      rtsys->oldinterruptinputs[i] = rtsys->interruptinputs[i];
    }
    i++;
    dn = (DataNode*) dn->getNext();
  }
  
  /* Check network */
  
  dn = (DataNode*) rtsys->networkList->getFirst();
  while (dn != NULL) {
    hdl = (InterruptHandler*) dn->data;
    Network* network = hdl->network;
    i = network->networkID - 1;
    //printf("mdlOutputs: checking network #%d inp: %d oldinp: %d\n",i,rtsys->networkinputs[i],rtsys->oldnetworkinputs[i]);
    if (fabs(rtsys->networkinputs[i] - rtsys->oldnetworkinputs[i]) > 0.1) {
      hdl->moveToList(rtsys->readyQ);
      detected = 1;
      rtsys->oldnetworkinputs[i] = rtsys->networkinputs[i];
    }
    dn = (DataNode*) dn->getNext();
  }
  
  /* Run kernel? */

  double externTime =  (rtsys->time- rtsys->clockOffset) / rtsys->clockDrift;
  if ((externTime >= rtsys->nextHit) || (detected > 0)) {
    dTime = runKernel(ssGetT(S));
    if (rtsys->error) {
      // Something went wrong executing a code function
      mxArray *bn[1];
      mexCallMATLAB(1, bn, 0, 0, "gcs"); // get current system
      char buf[200];
      mxGetString(bn[0], buf, 200);
      for (unsigned int i=0; i<strlen(buf); i++) if (buf[i]=='\n') buf[i]=' '; 
      printf("In block ==> '%s'\nSimulation aborted!\n", buf); 
      ssSetStopRequested(S, 1);
    } else {
      rtsys->nextHit = (rtsys->time + dTime - rtsys->clockOffset) / rtsys->clockDrift;
    }
  }

  
  /* Outputs */

  for (i=0; i<rtsys->nbrOfOutputs; i++) {
    y[i] = rtsys->outputs[i];
  }
  
  
  /* Network send */

  for (i=0; i<rtsys->nbrOfNetworks; i++) {
    n[i] = rtsys->nwSnd[i];
  }
  
  /* Task schedule */
  
  i = 0;
  j = 0;
  dn = (DataNode*) rtsys->taskList->getFirst();
  while (dn != NULL) {
    t = (UserTask*) dn->data;
    rtsys->taskSched[i] = (double) (j+1);
    if (t->display) j++;
    dn = (DataNode*) dn->getNext();
    i++;
  }
  
  task = (Task*) rtsys->readyQ->getFirst();
  while (task != NULL) {
    if (task->isUserTask()) {
      t = (UserTask*) task;
      rtsys->taskSched[t->taskID - 1] += 0.25;
    }
    task = (Task*) task->getNext();
  }
  
  if ((rtsys->running != NULL) && (rtsys->running->isUserTask())) {
    t = (UserTask*) rtsys->running;
    rtsys->taskSched[t->taskID - 1] += 0.25;
  }    

  i = 0;
  j = 0;
  dn = (DataNode*) rtsys->taskList->getFirst();
  while (dn != NULL) {
    t = (UserTask*) dn->data;
    if (t->display) {
      s[j] = rtsys->taskSched[i];
      j++;
    }
    dn = (DataNode*) dn->getNext();
    i++;
  }
  

  /* Handler schedule */
  
  i = 0;
  j = 0;
  dn = (DataNode*) rtsys->handlerList->getFirst();
  while (dn != NULL) {
    rtsys->handlerSched[i] = (double) (j+rtsys->nbrOfSchedTasks+2);
    if (i==0 && rtsys->contextSwitchTime > EPS) {
      // Context switch schedule, move graph down to task level
      rtsys->handlerSched[i] = rtsys->handlerSched[i] - 1;
    }
    hdl = (InterruptHandler*) dn->data;
    if (hdl->display) j++;
    dn = (DataNode*) dn->getNext();
    i++;
  }

  task = (Task*) rtsys->readyQ->getFirst();
  while (task != NULL) {
    if (!(task->isUserTask())) {
      hdl = (InterruptHandler*) task;
      rtsys->handlerSched[hdl->handlerID - 1] += 0.25;
    }
    task = (Task*) task->getNext();
  }

  if ((rtsys->running != NULL) && (!(rtsys->running->isUserTask()))) {
    hdl = (InterruptHandler*) rtsys->running;
    rtsys->handlerSched[hdl->handlerID - 1] += 0.25;
  }

  i = 0;
  j = 0;
  dn = (DataNode*) rtsys->handlerList->getFirst();
  while (dn != NULL) {
    hdl = (InterruptHandler*) dn->data;
    if (hdl->display) {
      s[j+rtsys->nbrOfSchedTasks] = rtsys->handlerSched[i];
      j++;
    }
    dn = (DataNode*) dn->getNext();
    i++;
  }
  
  /* Monitor graph */
  
  k = 0;
  dn = (DataNode*) rtsys->monitorList->getFirst();
  while (dn != NULL) {
    mon = (Monitor*) dn->data;
    
    for (j=0; j<rtsys->nbrOfTasks; j++)
      rtsys->monitorGraph[j] = (double) (j+1+k*(1+rtsys->nbrOfTasks));
    
    t = (UserTask*) mon->waitingQ->getFirst();
    while (t != NULL) {
      i = t->taskID;
      rtsys->monitorGraph[i-1] += 0.25;
      t = (UserTask*) t->getNext();
    }
    if (mon->heldBy != NULL) {
      i = mon->heldBy->taskID;
      rtsys->monitorGraph[i-1] += 0.5;
    }
    if (mon->display) {
      for (j=0; j<rtsys->nbrOfTasks; j++)
 	m[j+k*rtsys->nbrOfTasks] = rtsys->monitorGraph[j];
      k++;
    }
    dn = (DataNode*) dn->getNext();
  }

  /* Energy consumption */
  energyConsumption[0] = rtsys->energyConsumption;
} 
Ejemplo n.º 20
0
static void mdlOutputs(SimStruct *S, int_T tid)
{
  debugPrintf("'%s': mdlOutputs at %.16f\n", rtsys->blockName, ssGetT(S));

  rtsys = (RTsys*) ssGetUserData(S);

  if (rtsys->init_phase) {
    /* Failure during initialization */
    return;
  }

  real_T *y = ssGetOutputPortRealSignal(S,0);
  real_T *n = ssGetOutputPortRealSignal(S,1);
  real_T *s = ssGetOutputPortRealSignal(S,2);
  real_T *e = ssGetOutputPortRealSignal(S,3);
  int i, shouldRunKernel = 0;
  double timestep; 
 
  DataNode *dn;
  UserTask* t;
  InterruptHandler* hdl;

  if (!rtsys->started && ssGetT(S) == 0.0) {
   rtsys->started = true;
  } else {

    /* Storing the time */

    rtsys->time = ssGetT(S) * rtsys->clockDrift + rtsys->clockOffset;
    
    shouldRunKernel = 0;
  
    /* Run kernel? */
    
    double externTime =  (rtsys->time- rtsys->clockOffset) / rtsys->clockDrift;
    if ((externTime >= rtsys->nextHit) || (shouldRunKernel > 0)) {
      timestep = runKernel(ssGetT(S));
      if (rtsys->error) {
	mexPrintf("In block ==> '%s'\n", ssGetBlockName(S));
	mexPrintf("Simulation aborted!\n");
	ssSetErrorStatus(S, errbuf);
	return;
      } else {
	rtsys->nextHit = (rtsys->time + timestep - rtsys->clockOffset) / rtsys->clockDrift;
      }
    }
  }

  /* Analog outputs */

  for (i=0; i<rtsys->nbrOfOutputs; i++) {
    y[i] = rtsys->outputs[i];
  }
    
  /* Network send outputs */

  for (i=0; i<rtsys->nbrOfNetworks; i++) {
    n[i] = rtsys->nwSnd[i];
    rtsys->oldnwSnd[i] = rtsys->nwSnd[i];
  }
  
  /* Usertask schedule outputs */
  
  i = 0;

  dn = (DataNode*) rtsys->taskList->getFirst();
  while (dn != NULL) {
    t = (UserTask*) dn->data;
    if (t->display) {
      double val = (double) (i+1);
      for (int j = 0; j < rtsys->nbrOfCPUs; j++) {
	s[i + j * rtsys->nbrOfSchedTasks] = val;
      }
      if (t->state == RUNNING) {
	val += 0.5;
      } else if (t->state == READY) {
	val += 0.25;
      } else if (t->state == WAITING) {
	val += 0.125;
      }
      s[i + t->affinity * rtsys->nbrOfSchedTasks] = val;
      i++;
      if (i > rtsys->nbrOfSchedTasks) {
	mexPrintf("FATAL ERROR: schedule output port out of bounds!\n"); 
	ssSetErrorStatus(S, "error");
	return;
      }
     }
    dn = (DataNode*) dn->getNext();
  }
  
  /* Handler schedule outputs */
  
  dn = (DataNode*) rtsys->handlerList->getFirst();
  while (dn != NULL) {
    hdl = (InterruptHandler*) dn->data;
    if (hdl->display) {
      double val = (double) (i+1);
      if (hdl->state == RUNNING) {
	val += 0.5;
      } else if (hdl->state == READY) {
	val += 0.25;
      }
      s[i + hdl->affinity * rtsys->nbrOfSchedTasks] = val;
      i++;
      if (i > rtsys->nbrOfSchedTasks) {
	mexPrintf("FATAL ERROR: schedule output port out of bounds!\n"); 
	ssSetErrorStatus(S, "error");
	return;
      }
     }
    dn = (DataNode*) dn->getNext();
  }
  
  /* Energy consumption output */
  e[0] = rtsys->energyConsumption;

} 
Ejemplo n.º 21
0
void
bluesteinsFFTGpu(const char* const argv[],const unsigned n, 
		 const unsigned orign,const unsigned size)
{
  const unsigned powM = (unsigned) log2(n);
  printf("Compiling Bluesteins Program..\n");

  compileProgram(argv, "fft.h", "kernels/bluesteins.cl");

    printf("Creating Kernel\n");
    for (unsigned i = 0; i < deviceCount; ++i) {
        createKernel(i, "bluesteins");
    }

    const unsigned sizePerGPU = size / deviceCount;
    for (unsigned i = 0; i < deviceCount; ++i) {
        workSize[i] = (i != (deviceCount - 1)) ? sizePerGPU 
                                               : (size - workOffset[i]);       
        
        allocateDeviceMemoryBS(i , workSize[i], workOffset[i]);
        
        clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void*) &d_Hreal[i]);
        clSetKernelArg(kernel[i], 1, sizeof(cl_mem), (void*) &d_Himag[i]);
	clSetKernelArg(kernel[i], 2, sizeof(cl_mem), (void*) &d_Yreal[i]);
        clSetKernelArg(kernel[i], 3, sizeof(cl_mem), (void*) &d_Yimag[i]);
	clSetKernelArg(kernel[i], 4, sizeof(cl_mem), (void*) &d_Zreal[i]);
        clSetKernelArg(kernel[i], 5, sizeof(cl_mem), (void*) &d_Zimag[i]);
	clSetKernelArg(kernel[i], 6, sizeof(unsigned), &n);
	clSetKernelArg(kernel[i], 7, sizeof(unsigned), &orign);
	clSetKernelArg(kernel[i], 8, sizeof(unsigned), &powM);
	clSetKernelArg(kernel[i], 9, sizeof(unsigned), &blockSize);
       

        if ((i + 1) < deviceCount) {
            workOffset[i + 1] = workOffset[i] + workSize[i];
        } 

    }

    size_t localWorkSize[] = {blockSize};
    for (unsigned i = 0; i < deviceCount; ++i) {
        size_t globalWorkSize[] = {shrRoundUp(blockSize, workSize[i])}; 
        // kernel non blocking execution 
        runKernel(i, localWorkSize, globalWorkSize);
    }

    h_Rreal = h_Hreal;
    h_Rimag = h_Himag;
    
    for (unsigned i = 0; i < deviceCount; ++i) {
        copyFromDevice(i, d_Hreal[i], h_Rreal + workOffset[i],
                                                workSize[i]); 
        copyFromDevice(i, d_Himag[i], h_Rimag + workOffset[i],
                                                 workSize[i]);
    }

    // wait for copy event
    const cl_int ciErrNum = clWaitForEvents(deviceCount, gpuDone);
    checkError(ciErrNum, CL_SUCCESS, "clWaitForEvents");
    printGpuTime();
}