/* Simple convenience wrapper for conjugate gradient solver */ static void solve_cg_bem2d(matrixtype type, void *A, pavector b, pavector x, real accuracy, uint steps) { addeval_t addevalA; pavector r, p, a; uint i, n; real norm; switch (type) { case AMATRIX: addevalA = (addeval_t) addeval_amatrix_avector; break; case HMATRIX: addevalA = (addeval_t) addeval_hmatrix_avector; break; case H2MATRIX: addevalA = (addeval_t) addeval_h2matrix_avector; break; default: printf("ERROR: unknown matrix type!\n"); abort(); break; } n = b->dim; assert(x->dim == n); r = new_avector(n); p = new_avector(n); a = new_avector(n); random_avector(x); init_cg(addevalA, A, b, x, r, p, a); for (i = 1; i < steps; i++) { step_cg(addevalA, A, b, x, r, p, a); norm = norm2_avector(r); #ifndef NDEBUG printf(" Residual: %.5e\t Iterations: %u\r", norm, i); fflush(stdout); #endif if (norm <= accuracy) { break; } } #ifndef NDEBUG printf(" Residual: %.5e\t Iterations: %u\n", norm2_avector(r), i); #endif del_avector(r); del_avector(p); del_avector(a); }
void D3DVideo::init(const video_info_t &info) { if (!g_pD3D) init_base(info); else if (needs_restore) { D3DPRESENT_PARAMETERS d3dpp; make_d3dpp(info, d3dpp); if (dev->Reset(&d3dpp) != D3D_OK) { HRESULT res = dev->TestCooperativeLevel(); const char *err; switch (res) { case D3DERR_DEVICELOST: err = "DEVICELOST"; break; case D3DERR_DEVICENOTRESET: err = "DEVICENOTRESET"; break; case D3DERR_DRIVERINTERNALERROR: err = "DRIVERINTERNALERROR"; break; default: err = "Unknown"; } // Try to recreate the device completely ... RARCH_WARN("[D3D9]: Attempting to recover from dead state (%s).\n", err); deinit(); g_pD3D->Release(); g_pD3D = nullptr; init_base(info); RARCH_LOG("[D3D9]: Recovered from dead state.\n"); } } calculate_rect(screen_width, screen_height, info.force_aspect, g_settings.video.aspect_ratio); #ifdef HAVE_CG if (!init_cg()) throw std::runtime_error("Failed to init Cg"); #endif if (!init_chain(info)) throw std::runtime_error("Failed to init render chain"); if (!init_font()) throw std::runtime_error("Failed to init Font"); }
void D3DVideo::init(const video_info_t &info) { if (!g_pD3D) init_base(info); else if (needs_restore) { D3DPRESENT_PARAMETERS d3dpp; make_d3dpp(info, d3dpp); if (dev->Reset(&d3dpp) != D3D_OK) throw std::runtime_error("Failed to reset device ..."); } calculate_rect(screen_width, screen_height, info.force_aspect, g_settings.video.aspect_ratio); if (!init_cg()) throw std::runtime_error("Failed to init Cg"); if (!init_chain(info)) throw std::runtime_error("Failed to init render chain"); if (!init_font()) throw std::runtime_error("Failed to init Font"); }
///\brief Creates an object to perform sums of texture data. Sum::Sum(GLuint width, GLuint height, int numReduce ) { makeStrategy(width,height, -1); /* * verify dimensions are valid. */ isValidDimensions(width, height); this->width = width; this->height=height; /* * determine how many times to reduce */ //N_REDUCE = numLevels( (int)width, (int)height); N_REDUCE = sizes.size() -1 ; fb = (GLuint *)malloc( sizeof( GLuint ) * N_REDUCE ); tex = (GLuint *)malloc( sizeof( GLuint ) * N_REDUCE ); /* todo larger than needed for debug */ rbbuf = (float *)malloc( sizeof(float) * width * height * 4); assert( rbbuf!=NULL); glGenFramebuffersEXT(N_REDUCE, fb); glGenTextures(N_REDUCE, tex); errcheck(); for( int i=0 ; i<N_REDUCE; i++ ) { //int lvlW = width/(int)powf(2.0,i+1); //int lvlH = height/(int)powf(2.0,i+1); int lvlW = (int)sizes[i+1].x(); int lvlH = (int)sizes[i+1].y(); /* cerr<<i<<" : "<<lvlW<<"x"<<lvlH<<endl; */ //bind the framebuffer, fb, so operations will now occur on it glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb[i]); // initialize texture that will store the framebuffer image (first target) //glBindTexture(GL_TEXTURE_RECTANGLE_NV, tex[0]); glBindTexture(GL_TEXTURE_RECTANGLE_NV, tex[i]); // sums need max precision/ range. Use FP32 texture glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_FLOAT_RGBA32_NV, lvlW, lvlH, 0, GL_RGBA, GL_FLOAT,NULL); glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE ); //fprintf(stderr,"Generate Mip map ok\n"); errcheck(); // bind this texture to the current framebuffer obj. as // color_attachement_0 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_NV, tex[i], 0); errcheck(); //see if everything is OK CHECK_FRAMEBUFFER_STATUS() } /* cerr<<"SUM CREATED"<<endl; */ glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,0); init_cg(); /* Todo: rebind original framebuffer and texture to what it was when we were called */ glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0 ); }
// host stub function void op_par_loop_init_cg(char const *name, op_set set, op_arg arg0, op_arg arg1, op_arg arg2, op_arg arg3, op_arg arg4){ double*arg1h = (double *)arg1.data; int nargs = 5; op_arg args[5]; args[0] = arg0; args[1] = arg1; args[2] = arg2; args[3] = arg3; args[4] = arg4; // initialise timers double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timing_realloc(2); op_timers_core(&cpu_t1, &wall_t1); OP_kernels[2].name = name; OP_kernels[2].count += 1; if (OP_diags>2) { printf(" kernel routine w/o indirection: init_cg"); } op_mpi_halo_exchanges_cuda(set, nargs, args); double arg1_l = arg1h[0]; if (set->size >0) { //Set up typed device pointers for OpenACC double* data0 = (double*)arg0.data_d; double* data2 = (double*)arg2.data_d; double* data3 = (double*)arg3.data_d; double* data4 = (double*)arg4.data_d; #pragma acc parallel loop independent deviceptr(data0,data2,data3,data4) reduction(+:arg1_l) for ( int n=0; n<set->size; n++ ){ init_cg( &data0[1*n], &arg1_l, &data2[1*n], &data3[1*n], &data4[1*n]); } } // combine reduction data arg1h[0] = arg1_l; op_mpi_reduce_double(&arg1,arg1h); op_mpi_set_dirtybit_cuda(nargs, args); // update kernel record op_timers_core(&cpu_t2, &wall_t2); OP_kernels[2].time += wall_t2 - wall_t1; OP_kernels[2].transfer += (float)set->size * arg0.size; OP_kernels[2].transfer += (float)set->size * arg2.size * 2.0f; OP_kernels[2].transfer += (float)set->size * arg3.size * 2.0f; OP_kernels[2].transfer += (float)set->size * arg4.size * 2.0f; }
int main(int argc,char *argv[]) { char cmd[255]; int hcount=0,scount=0; float herr=0,serr=0,dice; Example *ex; int do_cg=0; float epsilon; int seed=1,first=1; float e,e0,lr,lrPrev; int start=1,i,j; char loadFile[255],fn[255]; setbuf(stdout,NULL); parallel_init(&argc,&argv); if (myid==0) { announce_version(); system("hostname"); } printf("pid %d\n",getpid()); loadFile[0]=0; /* what are the command line arguments? */ for(i=1;i<argc;i++) { if (strcmp(argv[i],"-seed")==0) { seed=atoi(argv[i+1]); i++; } else if (strcmp(argv[i],"-start")==0) { start=atoi(argv[i+1]); i++; } else if (strncmp(argv[i],"-epsilon",5)==0) { epsilon=atof(argv[i+1]); i++; } else if (strncmp(argv[i],"-load",5)==0) { strcpy(loadFile,argv[i+1]); i++; } else { fprintf(stderr,"unknown argument: %s\n",argv[i]); exit(1); } } default_epsilon=0.05; mikenet_set_seed(seed); build_hearing_model(SAMPLES); /* load in our example set */ phono_examples=load_examples("phono.pat",TICKS); sem_examples=load_examples("sem.pat",TICKS); hearing_examples=load_examples("ps.pat",TICKS); speaking_examples=load_examples("sp.pat",TICKS); phono_examples->numExamples=500; sem_examples->numExamples=500; hearing_examples->numExamples=500; speaking_examples->numExamples=500; myid=parallel_proc_id(); #ifdef DO_ONLINE_PRE_TRAIN if (start==1) { for(i=1;i<=10000;i++) { dice = mikenet_random(); if (dice <=0.2) { ex=get_random_example(phono_examples); crbp_forward(phono,ex); crbp_compute_gradients(phono,ex); crbp_apply_deltas(phono); } else if (dice <= 0.5) { ex=get_random_example(hearing_examples); crbp_forward(ps,ex); crbp_compute_gradients(ps,ex); herr += compute_error(ps,ex); crbp_apply_deltas(ps); } else if (dice <= 0.7) { ex=get_random_example(sem_examples); crbp_forward(sem,ex); crbp_compute_gradients(sem,ex); crbp_apply_deltas(sem); } else { ex=get_random_example(speaking_examples); crbp_forward(sp,ex); crbp_compute_gradients(sp,ex); serr+=compute_error(sp,ex); crbp_apply_deltas(sp); } if (i % 100 == 0) { printf("%d hear %f speak %f\n",i, herr/hcount,serr/scount); herr=0.0; serr=0.0; hcount=0; scount=0; } } sprintf(fn,"s%d_online_weights",seed); save_weights(hearing,fn); } #endif parallel_broadcast_weights(hearing); do_cg=1; if (do_cg && myid==0) printf("USING CG\n"); /* loop for ITER number of times */ for(i=1;i<5;i++) { parallel_sync(); store_all_weights(hearing); e=parallel_g(ps,hearing_examples,0.8) + parallel_g(sp,speaking_examples,0.8) + parallel_g(sem,sem_examples,0.2) + parallel_g(phono,phono_examples,0.2); if(do_cg) { if (first) init_cg(hearing); else cg(hearing); first=0; } e0=e; if (myid==0) printf("%d e0: %f\n",i,e); lr = 0.2/hearing_examples->numExamples; lrPrev=lr/10; for(j=1;j<10;j++) { e=sample(lr); if (myid==0) printf("\t\t%d %f %f\n",j,e,lr); if (e>e0) { if (myid==0) test_step(hearing,lrPrev); parallel_broadcast_weights(hearing); break; } e0=e; lrPrev=lr; lr *= 1.7; } zero_gradients(hearing); if (i % 5==0 && myid==0) { sprintf(fn,"s%d_%d_weights",seed,i); save_weights(hearing,fn); } } parallel_finish(); return 0; }
int main(int argc, char** argv) { int dw = GetScreenWidth(); int dh = GetScreenHeight(); vec3 icp = {0.0F, 4.0F, 8.0F}; vec3 icl = {0.0F, 0.0F, 0.0F}; vec3 icu = {0.0F, 1.0F, 0.0F}; g_globalCam.SetCamera(icp, icl, icu); g_globalCam.SetPerspective(65.0F, dw / (float)dh, 0.1F, 1000.0F); // Query extension base // CreateAppWindow("Mooball", dw, dh, 32, false); initExtensions(); InitKeys(); srand(time(NULL)); // Query Device capabilities // if(!ValidateDevice()) { DestroyAppWindow(); return 0; } g_pTexInterface = new CTextureInterface; g_pShaderInterface = new CShaderInterface; g_lightManager = new CLightManager; g_bMSAA = (quickINI::Instance()->getValueAsInt("msaa") > 0) && (g_pTexInterface->GetMaxMSAA() > 0); g_bVSYNC = quickINI::Instance()->getValueAsInt("vsync") > 0; // g_model.Load( "Media/Models/Pokeball.3ds" ); CModelObject* pMdl = new COBJModel; std::string err = pMdl->LoadModel("sponza.obj", "Media/Models/"); // Initialize CG Runtime and shaders // init_cg(); // Turn vsync on // if( EXT_VSYNC && g_bVSYNC ) { #ifdef WIN32 wglSwapIntervalEXT(1); #else glXSwapIntervalSGI(1); #endif } // Create offscreen targets and depth configurations // init_render_targets(); // Added 8/4/10 - Keeps Mooball from hogging // the input focus while it's minimized. // HWND windowHandle = GetFocus(); while(running) { if(QueryQuitMsg()) running = false; else { if(!g_bDebugMode) { UpdateScene(); PollKeys(); PollMouse(); } RenderScene(); FlipBuffers(); } } // Fall through to destruction // DestroyAppWindow(); delete g_lightManager; delete g_pTexInterface; delete g_pShaderInterface; return 0; }