Beispiel #1
0
/* Simple convenience wrapper for conjugate gradient solver */
static void
solve_cg_bem2d(matrixtype type, void *A, pavector b, pavector x,
	       real accuracy, uint steps)
{
  addeval_t addevalA;
  pavector  r, p, a;
  uint      i, n;
  real      norm;

  switch (type) {
  case AMATRIX:
    addevalA = (addeval_t) addeval_amatrix_avector;
    break;
  case HMATRIX:
    addevalA = (addeval_t) addeval_hmatrix_avector;
    break;
  case H2MATRIX:
    addevalA = (addeval_t) addeval_h2matrix_avector;
    break;
  default:
    printf("ERROR: unknown matrix type!\n");
    abort();
    break;
  }

  n = b->dim;
  assert(x->dim == n);

  r = new_avector(n);
  p = new_avector(n);
  a = new_avector(n);
  random_avector(x);

  init_cg(addevalA, A, b, x, r, p, a);

  for (i = 1; i < steps; i++) {
    step_cg(addevalA, A, b, x, r, p, a);
    norm = norm2_avector(r);
#ifndef NDEBUG
    printf("  Residual: %.5e\t Iterations: %u\r", norm, i);
    fflush(stdout);
#endif
    if (norm <= accuracy) {
      break;
    }
  }
#ifndef NDEBUG
  printf("  Residual: %.5e\t Iterations: %u\n", norm2_avector(r), i);
#endif

  del_avector(r);
  del_avector(p);
  del_avector(a);

}
Beispiel #2
0
void D3DVideo::init(const video_info_t &info)
{
   if (!g_pD3D)
      init_base(info);
   else if (needs_restore)
   {
      D3DPRESENT_PARAMETERS d3dpp;
      make_d3dpp(info, d3dpp);
      if (dev->Reset(&d3dpp) != D3D_OK)
      {
         HRESULT res = dev->TestCooperativeLevel();
         const char *err;
         switch (res)
         {
            case D3DERR_DEVICELOST:
               err = "DEVICELOST";
               break;

            case D3DERR_DEVICENOTRESET:
               err = "DEVICENOTRESET";
               break;

            case D3DERR_DRIVERINTERNALERROR:
               err = "DRIVERINTERNALERROR";
               break;

            default:
               err = "Unknown";
         }
         // Try to recreate the device completely ...
         RARCH_WARN("[D3D9]: Attempting to recover from dead state (%s).\n", err);
         deinit(); 
         g_pD3D->Release();
         g_pD3D = nullptr;
         init_base(info);
         RARCH_LOG("[D3D9]: Recovered from dead state.\n");
      }
   }

   calculate_rect(screen_width, screen_height, info.force_aspect, g_settings.video.aspect_ratio);

#ifdef HAVE_CG
   if (!init_cg())
      throw std::runtime_error("Failed to init Cg");
#endif
   if (!init_chain(info))
      throw std::runtime_error("Failed to init render chain");
   if (!init_font())
      throw std::runtime_error("Failed to init Font");
}
Beispiel #3
0
void D3DVideo::init(const video_info_t &info)
{
   if (!g_pD3D)
      init_base(info);
   else if (needs_restore)
   {
      D3DPRESENT_PARAMETERS d3dpp;
      make_d3dpp(info, d3dpp);
      if (dev->Reset(&d3dpp) != D3D_OK)
         throw std::runtime_error("Failed to reset device ...");
   }

   calculate_rect(screen_width, screen_height, info.force_aspect, g_settings.video.aspect_ratio);

   if (!init_cg())
      throw std::runtime_error("Failed to init Cg");
   if (!init_chain(info))
      throw std::runtime_error("Failed to init render chain");
   if (!init_font())
      throw std::runtime_error("Failed to init Font");
}
Beispiel #4
0
 ///\brief Creates an object to perform sums of texture data.
Sum::Sum(GLuint width, GLuint height, int numReduce ) 
{
    makeStrategy(width,height, -1);
    /* 
     * verify dimensions are valid.
     */ 
    isValidDimensions(width, height);
    this->width = width;
    this->height=height;

    /* 
     *  determine how many times to reduce
     */
    //N_REDUCE = numLevels( (int)width, (int)height);
    N_REDUCE = sizes.size() -1 ;
	
    fb  = (GLuint *)malloc( sizeof( GLuint ) * N_REDUCE );
    tex = (GLuint *)malloc( sizeof( GLuint ) * N_REDUCE );
    /* todo larger than needed for debug */
    rbbuf = (float *)malloc( sizeof(float) * width * height * 4);
    assert( rbbuf!=NULL);
    
    glGenFramebuffersEXT(N_REDUCE, fb);
    glGenTextures(N_REDUCE, tex);       
    errcheck();
    for( int i=0 ; i<N_REDUCE; i++ ) 
    {
        //int lvlW = width/(int)powf(2.0,i+1);
        //int lvlH = height/(int)powf(2.0,i+1);
        int lvlW = (int)sizes[i+1].x();
        int lvlH = (int)sizes[i+1].y();
        /*      cerr<<i<<" : "<<lvlW<<"x"<<lvlH<<endl; */
        //bind the framebuffer, fb, so operations will now occur on it
        glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb[i]);

        // initialize texture that will store the framebuffer image (first target)
        //glBindTexture(GL_TEXTURE_RECTANGLE_NV, tex[0]);
        glBindTexture(GL_TEXTURE_RECTANGLE_NV, tex[i]);    
        // sums need max precision/ range. Use FP32 texture
        glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_FLOAT_RGBA32_NV, 
                lvlW, lvlH, 0, GL_RGBA, GL_FLOAT,NULL);
        glTexParameteri(GL_TEXTURE_RECTANGLE_NV,
                        GL_TEXTURE_MAG_FILTER, GL_NEAREST); 
        glTexParameteri(GL_TEXTURE_RECTANGLE_NV,
                        GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE,  GL_REPLACE );

        //fprintf(stderr,"Generate Mip map ok\n");

        errcheck();

        // bind this texture to the current framebuffer obj. as 
        // color_attachement_0 
        glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_NV, tex[i], 0);
        errcheck();

        //see if everything is OK
        CHECK_FRAMEBUFFER_STATUS()
    }
    /*     cerr<<"SUM CREATED"<<endl; */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,0);
    init_cg();
    /* Todo: rebind original framebuffer and texture to what it was when we were called */
    glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0 );
}
Beispiel #5
0
// host stub function
void op_par_loop_init_cg(char const *name, op_set set,
  op_arg arg0,
  op_arg arg1,
  op_arg arg2,
  op_arg arg3,
  op_arg arg4){

  double*arg1h = (double *)arg1.data;
  int nargs = 5;
  op_arg args[5];

  args[0] = arg0;
  args[1] = arg1;
  args[2] = arg2;
  args[3] = arg3;
  args[4] = arg4;

  // initialise timers
  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timing_realloc(2);
  op_timers_core(&cpu_t1, &wall_t1);
  OP_kernels[2].name      = name;
  OP_kernels[2].count    += 1;


  if (OP_diags>2) {
    printf(" kernel routine w/o indirection:  init_cg");
  }

  op_mpi_halo_exchanges_cuda(set, nargs, args);

  double arg1_l = arg1h[0];

  if (set->size >0) {


    //Set up typed device pointers for OpenACC

    double* data0 = (double*)arg0.data_d;
    double* data2 = (double*)arg2.data_d;
    double* data3 = (double*)arg3.data_d;
    double* data4 = (double*)arg4.data_d;
    #pragma acc parallel loop independent deviceptr(data0,data2,data3,data4) reduction(+:arg1_l)
    for ( int n=0; n<set->size; n++ ){
      init_cg(
        &data0[1*n],
        &arg1_l,
        &data2[1*n],
        &data3[1*n],
        &data4[1*n]);
    }
  }

  // combine reduction data
  arg1h[0] = arg1_l;
  op_mpi_reduce_double(&arg1,arg1h);
  op_mpi_set_dirtybit_cuda(nargs, args);

  // update kernel record
  op_timers_core(&cpu_t2, &wall_t2);
  OP_kernels[2].time     += wall_t2 - wall_t1;
  OP_kernels[2].transfer += (float)set->size * arg0.size;
  OP_kernels[2].transfer += (float)set->size * arg2.size * 2.0f;
  OP_kernels[2].transfer += (float)set->size * arg3.size * 2.0f;
  OP_kernels[2].transfer += (float)set->size * arg4.size * 2.0f;
}
Beispiel #6
0
int main(int argc,char *argv[])
{
  char cmd[255];
  int hcount=0,scount=0;
  float herr=0,serr=0,dice;
  Example *ex;
  int do_cg=0;
  float epsilon;
  int seed=1,first=1;
  float e,e0,lr,lrPrev;
  int start=1,i,j;
  char  loadFile[255],fn[255];
  setbuf(stdout,NULL);

  parallel_init(&argc,&argv);

  if (myid==0)
    {
      announce_version();
      system("hostname");
    }
  printf("pid %d\n",getpid());


  loadFile[0]=0;

  /* what are the command line arguments? */
  for(i=1;i<argc;i++)
    {
      if (strcmp(argv[i],"-seed")==0)
	{
	  seed=atoi(argv[i+1]);
	  i++;
	}
      else if (strcmp(argv[i],"-start")==0)
	{
	  start=atoi(argv[i+1]);
	  i++;
	}
      else if (strncmp(argv[i],"-epsilon",5)==0)
	{
	  epsilon=atof(argv[i+1]);
	  i++;
	}
      else if (strncmp(argv[i],"-load",5)==0)
	{
	  strcpy(loadFile,argv[i+1]);
	  i++;
	}
      else
	{
	  fprintf(stderr,"unknown argument: %s\n",argv[i]);
	  exit(1);
	}
    }

  default_epsilon=0.05;


  mikenet_set_seed(seed);

  build_hearing_model(SAMPLES);

  /* load in our example set */
  phono_examples=load_examples("phono.pat",TICKS);
  sem_examples=load_examples("sem.pat",TICKS);
  hearing_examples=load_examples("ps.pat",TICKS);
  speaking_examples=load_examples("sp.pat",TICKS);

  phono_examples->numExamples=500;
  sem_examples->numExamples=500;
  hearing_examples->numExamples=500;
  speaking_examples->numExamples=500;


  myid=parallel_proc_id();


#ifdef DO_ONLINE_PRE_TRAIN
  if (start==1)
    {
      for(i=1;i<=10000;i++)
	{
	  dice = mikenet_random();
	  if (dice <=0.2)
	    {
	      ex=get_random_example(phono_examples);
	      crbp_forward(phono,ex);
	      crbp_compute_gradients(phono,ex);
	      crbp_apply_deltas(phono);
	    }
	  else if (dice <= 0.5)
	    {
	      ex=get_random_example(hearing_examples);
	      crbp_forward(ps,ex);
	      crbp_compute_gradients(ps,ex);
	      herr += compute_error(ps,ex);
	      crbp_apply_deltas(ps);
	    }
	  else if (dice <= 0.7)
	    {
	      ex=get_random_example(sem_examples);
	      crbp_forward(sem,ex);
	      crbp_compute_gradients(sem,ex);
	      crbp_apply_deltas(sem);
	    }
	  else
	    {
	      ex=get_random_example(speaking_examples);
	      crbp_forward(sp,ex);
	      crbp_compute_gradients(sp,ex);
	      serr+=compute_error(sp,ex);
	      crbp_apply_deltas(sp);
	    }

	  if (i % 100 == 0)
	    {
	      printf("%d hear %f speak %f\n",i,
		     herr/hcount,serr/scount);
	      herr=0.0;
	      serr=0.0;
	      hcount=0;
	      scount=0;
	    }
	}
      sprintf(fn,"s%d_online_weights",seed);
      save_weights(hearing,fn);
    }
#endif

  parallel_broadcast_weights(hearing);


  do_cg=1;
  if (do_cg && myid==0)
    printf("USING CG\n");

  /* loop for ITER number of times */
  for(i=1;i<5;i++)
    {
      parallel_sync();
      store_all_weights(hearing);
      e=parallel_g(ps,hearing_examples,0.8) +
	parallel_g(sp,speaking_examples,0.8) +
	parallel_g(sem,sem_examples,0.2) +
	parallel_g(phono,phono_examples,0.2);
      
      if(do_cg)
	{
	  if (first)
	    init_cg(hearing);
	  else
	    cg(hearing);
	  first=0;
	}

      e0=e;
      if (myid==0)
	printf("%d e0: %f\n",i,e);
      lr = 0.2/hearing_examples->numExamples;
      lrPrev=lr/10;
      for(j=1;j<10;j++)
	{
	  e=sample(lr);
	  if (myid==0)
	    printf("\t\t%d %f %f\n",j,e,lr);
	  if (e>e0)
	    {
	      if (myid==0)
		test_step(hearing,lrPrev);
	      parallel_broadcast_weights(hearing);
	      break;
	    }
	  e0=e;
	  lrPrev=lr;
	  lr *= 1.7;
	}
      zero_gradients(hearing);
      if (i % 5==0 && myid==0)
	{
	  sprintf(fn,"s%d_%d_weights",seed,i);
	  save_weights(hearing,fn);
	}
    }
  parallel_finish();
  return 0;
}
Beispiel #7
0
int main(int argc, char** argv)
{

	int dw = GetScreenWidth();
	int dh = GetScreenHeight();

	vec3 icp = {0.0F, 4.0F, 8.0F};
	vec3 icl = {0.0F, 0.0F, 0.0F};
	vec3 icu = {0.0F, 1.0F, 0.0F};
	g_globalCam.SetCamera(icp, icl, icu);
	g_globalCam.SetPerspective(65.0F, dw / (float)dh, 0.1F, 1000.0F);



	// Query extension base //
	CreateAppWindow("Mooball", dw, dh, 32, false);
	initExtensions();
	InitKeys();

	srand(time(NULL));

	// Query Device capabilities //
	if(!ValidateDevice())
	{
		DestroyAppWindow();
		return 0;
	}
	g_pTexInterface = new CTextureInterface;
	g_pShaderInterface = new CShaderInterface;
	g_lightManager = new CLightManager;
	g_bMSAA = (quickINI::Instance()->getValueAsInt("msaa") > 0) && (g_pTexInterface->GetMaxMSAA() > 0);
	g_bVSYNC = quickINI::Instance()->getValueAsInt("vsync") > 0;

//	g_model.Load( "Media/Models/Pokeball.3ds" );
	CModelObject* pMdl = new COBJModel;
	std::string err = pMdl->LoadModel("sponza.obj", "Media/Models/");

	// Initialize CG Runtime and shaders //
	init_cg();

	// Turn vsync on //
	if( EXT_VSYNC && g_bVSYNC )
	{
        #ifdef WIN32
            wglSwapIntervalEXT(1);
        #else
            glXSwapIntervalSGI(1);
        #endif
	}

	// Create offscreen targets and depth configurations //
	init_render_targets();

	// Added 8/4/10 - Keeps Mooball from hogging
	// the input focus while it's minimized.
//	HWND windowHandle = GetFocus();

	while(running)
	{
		if(QueryQuitMsg())
			running = false;

		else
		{
			if(!g_bDebugMode)
			{
				UpdateScene();
				PollKeys();
				PollMouse();
			}
			RenderScene();
			FlipBuffers();
		}

	}

	// Fall through to destruction //
	DestroyAppWindow();

	delete g_lightManager;
	delete g_pTexInterface;
	delete g_pShaderInterface;

	return 0;
}