int main(int argc, char **argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    std::cout << " === Possible cmd line options: -pregenerate, -cache === " << std::endl;

    /* set default camera */
    g_camera.from = Vec3fa(1.5f,1.5f,-1.5f);
    g_camera.to   = Vec3fa(0.0f,0.0f,0.0f);

    /*! Parse command line options. */  
    parseCommandLine(new ParseStream(new CommandLineStream(argc, argv)), FileName());

    /*! Set the thread count in the Embree configuration string. */
    if (g_numThreads) g_rtcore += ",threads=" + std::to_string((long long)g_numThreads);
    g_rtcore += g_subdiv_mode;

    /*! Initialize Embree state. */
    init(g_rtcore.c_str());

    /* render to disk */
    if (outFilename.str() != "")
      renderToFile(outFilename);
    
    /* interactive mode */
    if (g_interactive) {
      initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
      enterWindowRunLoop();
    }
    return 0;
  }
示例#2
0
文件: Mixer.cpp 项目: Skyh13/lmms
void Mixer::fifoWriter::run()
{
// set denormal protection for this thread
#ifdef __SSE3__
/* DAZ flag */
	_MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON );
#endif
#ifdef __SSE__
/* FTZ flag */
	_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif

#if 0
#ifdef LMMS_BUILD_LINUX
#ifdef LMMS_HAVE_SCHED_H
	cpu_set_t mask;
	CPU_ZERO( &mask );
	CPU_SET( 0, &mask );
	sched_setaffinity( 0, sizeof( mask ), &mask );
#endif
#endif
#endif

	const fpp_t frames = m_mixer->framesPerPeriod();
	while( m_writing )
	{
		surroundSampleFrame * buffer = new surroundSampleFrame[frames];
		const surroundSampleFrame * b = m_mixer->renderNextBuffer();
		memcpy( buffer, b, frames * sizeof( surroundSampleFrame ) );
		m_fifo->write( buffer );
	}

	m_fifo->write( NULL );
}
示例#3
0
  static uintptr_t recordThread_(void *recordData_)
  {
    recordData_t *recordData = (recordData_t *)recordData_;
    int retval = 0;
    const int bufsize = 4096;
    MYFLT buf[bufsize];
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    while (recordData->running) {
        pthread_mutex_lock(&recordData->mutex);
        pthread_cond_wait(&recordData->condvar, &recordData->mutex);
        int sampsread;
        do {
            sampsread = csoundReadCircularBuffer(NULL, recordData->cbuf, buf, bufsize);
#ifdef USE_DOUBLE
            sf_write_double((SNDFILE *) recordData->sfile,
                            buf, sampsread);
#else
            sf_write_float((SNDFILE *) recordData->sfile,
                           buf, sampsread);
#endif
        } while(sampsread != 0);
        pthread_mutex_unlock(&recordData->mutex);
    }
    return (uintptr_t) ((unsigned int) retval);
  }
示例#4
0
  /* main function in embree namespace */
  int main(int argc, char** argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* set default camera */
    g_camera.from = Vec3fa(2.5f,2.5f,2.5f);
    g_camera.to   = Vec3fa(0.0f,0.0f,0.0f);

    /* create stream for parsing */
    Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv));

    /* parse command line */  
    parseCommandLine(stream, FileName());
    if (g_numThreads) 
      g_rtcore += ",threads=" + toString(g_numThreads);

    /* initialize ray tracing core */
    init(g_rtcore.c_str());

    /* render to disk */
    if (outFilename.str() != "") {
      renderToFile(outFilename);
      return 0;
    } 

    /* initialize GLUT */
    initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
    
    /* enter the GLUT run loop */
    enterWindowRunLoop();

    return 0;
  }
void TimeLagFilterCore::DTCalcThread::run(){
    //Disable denormalized floats
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    //Start
    float b, c;
    while(!threadShouldExit()){
        if(!core.paramsChanged) wait(-1);
        if(threadShouldExit()) return;
        if(core.sampleSwapDT){ 
            //Don't recalculate if waiting for sample to finish
            wait(1); //Try again soon
        }else{
            //Copy write to calc
            {
                const ScopedWriteLock writeLock(core.ctLock);
                memcpy(core.ct_calc, core.ct_write, core.num_filters * sizeof (CTParams));
                core.paramsChanged = false;
            }
            //Calculate filter coefficients
            const ScopedWriteLock writeLock(core.dtLock);
            for(int i=0; i<core.num_filters; ++i){
                filtercalculations(core.reduced_fs, core.ct_calc[i].center, core.ct_calc[i].bw, &b, &c);
                core.dt_calc[i].b = b;
                core.dt_calc[i].c = c;
            }
            core.sampleSwapDT = true;
        }
    }
}
示例#6
0
  /* main function in embree namespace */
  int main(int argc, char** argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* initialize ray tracing core and force bvh4.triangle4v hierarchy for triangles */
    rtcInit("tri_accel=bvh4.triangle4v");
    
    /* set error handler */
    rtcSetErrorFunction(error_handler);
    
    /* create scene */
    g_scene = rtcNewScene(RTC_SCENE_STATIC,RTC_INTERSECT1);
    addCube(g_scene,Vec3fa(-1,0,0));
    addCube(g_scene,Vec3fa(1,0,0));
    addCube(g_scene,Vec3fa(0,0,-1));
    addCube(g_scene,Vec3fa(0,0,1));
    addHair(g_scene);
    addGroundPlane(g_scene);
    rtcCommit (g_scene);

    /* print triangle BVH */
    print_bvh(g_scene);

    /* cleanup */
    rtcDeleteScene (g_scene);
    rtcExit();
    return 0;
  }
示例#7
0
/* main function in embree namespace */
int main(int argc, char** argv)
{
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* create stream for parsing */
    Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv));

    /* parse command line */
    parseCommandLine(stream, FileName());

    /* load default scene if none specified */
    if (filename.ext() == "") {
        FileName file = FileName::executableFolder() + FileName("models/cornell_box.ecs");
        parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path());
    }

    /* configure number of threads */
    if (g_numThreads)
        g_rtcore += ",threads=" + std::to_string((long long)g_numThreads);
    if (g_numBenchmarkFrames)
        g_rtcore += ",benchmark=1";

    g_rtcore += g_subdiv_mode;

    /* load scene */
    if (strlwr(filename.ext()) == std::string("obj")) {
        g_scene->add(loadOBJ(filename,g_subdiv_mode != ""));
    }
    else if (strlwr(filename.ext()) == std::string("xml")) {
        g_scene->add(loadXML(filename,one));
    }
    else if (filename.ext() != "")
        THROW_RUNTIME_ERROR("invalid scene type: "+strlwr(filename.ext()));

    /* initialize ray tracing core */
    init(g_rtcore.c_str());

    /* send model */
    g_obj_scene.add(g_scene.dynamicCast<SceneGraph::Node>(),g_instancing_mode);
    g_scene = nullptr;
    set_scene(&g_obj_scene);

    /* benchmark mode */
    if (g_numBenchmarkFrames)
        renderBenchmark(outFilename);

    /* render to disk */
    if (outFilename.str() != "")
        renderToFile(outFilename);

    /* interactive mode */
    if (g_interactive) {
        initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
        enterWindowRunLoop(g_anim_mode);
    }

    return 0;
}
示例#8
0
 static uintptr_t csoundPerformanceThread_(void *userData)
 {
   CsPerfThread_PerformScore p(userData);
   _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
   // perform the score
   int retval = p.Perform();
   // return positive value if stopped or end of score, and negative on error
   return (uintptr_t) ((unsigned int) retval);
 }
int main(int argc, char* argv[])
{
  /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  ispcEntry();
  
  return 0;
}
示例#10
0
bool Context::setFlushDenormal(bool on) {
#ifdef USE_SSE3
  // Setting flush-to-zero (FTZ) flag
  _MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON
                             : _MM_FLUSH_ZERO_OFF);

  // Setting denormals-are-zero (DAZ) flag
  _MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON
                                 : _MM_DENORMALS_ZERO_OFF);
  return true;
#else
  return false;
#endif
}
示例#11
0
int main(int argc, char* argv[])
{
  /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  /* create new Embree device */
  RTCDevice device = rtcNewDevice("verbose=1");

  /* ddelete device again */
  rtcDeleteDevice(device);
  
  return 0;
}
示例#12
0
文件: Util.cpp 项目: absorbguo/Paddle
void initMain(int argc, char** argv) {
  installLayerStackTracer();
  std::string line;
  for (int i = 0; i < argc; ++i) {
    line += argv[i];
    line += ' ';
  }

#ifndef GFLAGS_GFLAGS_H_
  namespace gflags = google;
#endif

  gflags::ParseCommandLineFlags(&argc, &argv, true);
  initializeLogging(argc, argv);
  LOG(INFO) << "commandline: " << line;
  CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];

  installProfilerSwitch();

#ifdef __SSE__
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif
#ifdef __SSE3__
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif

  if (FLAGS_seed == 0) {
    unsigned int t = time(NULL);
    srand(t);
    ThreadLocalRand::initSeed(t);
    LOG(INFO) << "random number seed=" << t;
  } else {
    srand(FLAGS_seed);
    ThreadLocalRand::initSeed(FLAGS_seed);
  }

  if (FLAGS_use_gpu) {
    // This is the initialization of the CUDA environment,
    // need before runInitFunctions.
    // TODO(hedaoyuan) Can be considered in the runInitFunctions,
    // but to ensure that it is the first to initialize.
    hl_start();
    hl_init(FLAGS_gpu_id);
  }

  version::printVersion();
  checkCPUFeature().check();
  runInitFunctions();
}
示例#13
0
void _initialize_cpu_thread	()
{
	debug_on_thread_spawn	();
#ifndef XRCORE_STATIC
	// fpu & sse 
	FPU::m24r	();
#endif  // XRCORE_STATIC
	if (CPU::ID.feature&_CPU_FEATURE_SSE)	{
		//_mm_setcsr ( _mm_getcsr() | (_MM_FLUSH_ZERO_ON+_MM_DENORMALS_ZERO_ON) );
		_MM_SET_FLUSH_ZERO_MODE			(_MM_FLUSH_ZERO_ON);
		if (_denormals_are_zero_supported)	{
			__try	{
				_MM_SET_DENORMALS_ZERO_MODE	(_MM_DENORMALS_ZERO_ON);
			} __except(EXCEPTION_EXECUTE_HANDLER) {
				_denormals_are_zero_supported	= FALSE;
			}
		}
	}
示例#14
0
void TimeLagFilterCore::DlyCalcThread::run(){
    //Disable denormalized floats
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    //Start
    float phasesum, groupsum;
    float a, b, center, bw, ctrsquared;
    float phase, group;
    float w, wsquared, twow, dw;
    int f, nw;
    while(!threadShouldExit()){
        if(!core.paramsChangedDelay) wait(-1);
        if(threadShouldExit()) return;
        {
            const ScopedReadLock readLock(core.ctLock);
            //Calculate delays
            dw = core.getMaxCtr() / ResponseGraph::NUM_RESP_W;
            w = 0.0f;
            for(nw = 0; nw < ResponseGraph::NUM_RESP_W; ++nw){
                phasesum = 0.0f;
                groupsum = 0.0f;
                wsquared = w * w;
                twow = 2.0f * w;
                for(f=0; f<core.num_filters; ++f){
                    center = core.ct_write[f].center;
                    bw = core.ct_write[f].bw;
                    ctrsquared = center * center;
                    a = ctrsquared - wsquared;
                    b = twow * bw * center;
                    phase = -2.0f * atan2(b, a);
                    group = -4.0f * bw * center * (ctrsquared - (core.getMaxBW() * wsquared)) / (a*a + b*b);
                    phasesum += phase;
                    groupsum += group;
                }
                core.phasedelay[nw] = phasesum;
                core.groupdelay[nw] = groupsum;
                w += dw;
            }
            core.paramsChangedDelay = false;
        }
    }
}
示例#15
0
void MixerWorkerThread::run()
{
// set denormal protection for this thread
#ifdef __SSE3__
/* DAZ flag */
	_MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON );
#endif
#ifdef __SSE__
/* FTZ flag */
	_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif	
	QMutex m;
	while( m_quit == false )
	{
		m.lock();
		queueReadyWaitCond->wait( &m );
		globalJobQueue.run();
		m.unlock();
	}
}
示例#16
0
void RayEngine::embreeInit() {

	cout << "Starting Embree..." << endl;

	// Init library
	Embree.device = rtcNewDevice(NULL);
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
	_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

	// Generate texture
	glGenTextures(1, &Embree.texture);
	glBindTexture(GL_TEXTURE_2D, Embree.texture);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glBindTexture(GL_TEXTURE_2D, 0);

	// Init scenes
	userData = this;
	for (uint i = 0; i < scenes.size(); i++)
		scenes[i]->embreeInit(Embree.device);

}
示例#17
0
/* exported for Rembedded.h */
void fpu_setup(Rboolean start)
{
    if (start) {
#ifdef __FreeBSD__
    fpsetmask(0);
#endif

#ifdef NEED___SETFPUCW
    __setfpucw(_FPU_IEEE);
#endif
#if (defined(__i386) || defined(__x86_64)) && defined(__INTEL_COMPILER) && __INTEL_COMPILER > 800
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);
#endif
    } else {
#ifdef __FreeBSD__
    fpsetmask(~0);
#endif

#ifdef NEED___SETFPUCW
    __setfpucw(_FPU_DEFAULT);
#endif
    }
}
示例#18
0
  /* main function in embree namespace */
  int main(int argc, char** argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* create stream for parsing */
    Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv));

    /* parse command line */  
    parseCommandLine(stream, FileName());

    /* load default scene if none specified */
    if (filename.ext() == "") {
      FileName file = FileName::executableFolder() + FileName("models/cornell_box.ecs");
      parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path());
    }

    /* configure number of threads */
    if (g_numThreads) 
      g_rtcore += ",threads=" + std::to_string((long long)g_numThreads);
    if (g_numBenchmarkFrames)
      g_rtcore += ",benchmark=1";

    g_rtcore += g_subdiv_mode;

    /* load scene */
    if (strlwr(filename.ext()) == std::string("obj"))
    {
      if (g_subdiv_mode != "") {
        std::cout << "enabling subdiv mode" << std::endl;
        loadOBJ(filename,one,g_obj_scene,true);	
      }
      else
        loadOBJ(filename,one,g_obj_scene);
    }
    else if (strlwr(filename.ext()) == std::string("xml"))
      loadXML(filename,one,g_obj_scene);
    else if (filename.ext() != "")
      THROW_RUNTIME_ERROR("invalid scene type: "+strlwr(filename.ext()));
    
    /* load keyframes */
    if (keyframeList.str() != "")
      loadKeyFrameAnimation(keyframeList);
    
    /* initialize ray tracing core */
    init(g_rtcore.c_str());

    /* set shader mode */
    switch (g_shader) {
    case SHADER_EYELIGHT: key_pressed(GLUT_KEY_F2); break;
    case SHADER_UV      : key_pressed(GLUT_KEY_F4); break;
    case SHADER_NG      : key_pressed(GLUT_KEY_F5); break;
    case SHADER_GEOMID  : key_pressed(GLUT_KEY_F6); break;
    case SHADER_GEOMID_PRIMID: key_pressed(GLUT_KEY_F7); break;
    };
    
    /* convert triangle meshes to subdiv meshes */
    if (g_only_subdivs)
      g_obj_scene.convert_to_subdiv();

    /* send model */
    set_scene(&g_obj_scene);
    
    /* send keyframes */
    if (g_keyframes.size())
      set_scene_keyframes(&*g_keyframes.begin(),g_keyframes.size());

    /* benchmark mode */
    if (g_numBenchmarkFrames)
      renderBenchmark(outFilename);
    
    /* render to disk */
    if (outFilename.str() != "")
      renderToFile(outFilename);
    
    /* interactive mode */
    if (g_interactive) {
      initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
      enterWindowRunLoop(g_anim_mode);

    }

    return 0;
  }
示例#19
0
  TutorialApplication::TutorialApplication (const std::string& tutorialName, int features)

    : Application(features),
      tutorialName(tutorialName),

      shader(SHADER_DEFAULT),

      width(512),
      height(512),
      pixels(nullptr),

      outputImageFilename(""),

      skipBenchmarkFrames(0),
      numBenchmarkFrames(0),
      numBenchmarkRepetitions(1),

      interactive(true),
      fullscreen(false),

      window_width(512),
      window_height(512),
      windowID(0),

      time0(getSeconds()),
      debug_int0(0),
      debug_int1(0),

      mouseMode(0),
      clickX(0), clickY(0),
      speed(1.0f),
      moveDelta(zero),
      command_line_camera(false),
      print_frame_rate(false),
      avg_render_time(64,1.0),
      avg_frame_time(64,1.0),
      avg_mrayps(64,1.0),
      print_camera(false),

      debug0(0),
      debug1(0),
      debug2(0),
      debug3(0),

      iflags_coherent(RTC_INTERSECT_COHERENT),
      iflags_incoherent(RTC_INTERSECT_INCOHERENT)
  {
    /* only a single instance of this class is supported */
    assert(instance == nullptr);
    instance = this;

    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    registerOption("c", [this] (Ref<ParseStream> cin, const FileName& path) {
        FileName file = path + cin->getFileName();
        parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path());
      }, "-c <filename>: parses command line option from <filename>");

    registerOption("o", [this] (Ref<ParseStream> cin, const FileName& path) {
        outputImageFilename = cin->getFileName();
        interactive = false;
      }, "-o <filename>: output image filename");

    /* camera settings */
    registerOption("vp", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.from = cin->getVec3fa();
        command_line_camera = true;
      }, "--vp <float> <float> <float>: camera position");

    registerOption("vi", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.to = cin->getVec3fa();
        command_line_camera = true;
      }, "--vi <float> <float> <float>: camera lookat position");

    registerOption("vd", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.to = camera.from + cin->getVec3fa();
        command_line_camera = true;
      }, "--vd <float> <float> <float>: camera direction vector");

    registerOption("vu", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.up = cin->getVec3fa();
        command_line_camera = true;
      }, "--vu <float> <float> <float>: camera up vector");

    registerOption("fov", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.fov = cin->getFloat();
        command_line_camera = true;
      }, "--fov <float>: vertical field of view");

    /* framebuffer settings */
    registerOption("size", [this] (Ref<ParseStream> cin, const FileName& path) {
        width = cin->getInt();
        height = cin->getInt();
      }, "--size <width> <height>: sets image size");

    registerOption("fullscreen", [this] (Ref<ParseStream> cin, const FileName& path) {
        fullscreen = true;
      }, "--fullscreen: starts in fullscreen mode");

    registerOption("benchmark", [this] (Ref<ParseStream> cin, const FileName& path) {
        skipBenchmarkFrames = cin->getInt();
        numBenchmarkFrames  = cin->getInt();
        if (cin->peek() != "" && cin->peek()[0] != '-')
          numBenchmarkRepetitions = cin->getInt();
        interactive = false;
        rtcore += ",benchmark=1,start_threads=1";
      }, "--benchmark <N> <M> <R>: enabled benchmark mode, builds scene, skips N frames, renders M frames, and repeats this R times");

    registerOption("nodisplay", [this] (Ref<ParseStream> cin, const FileName& path) {
        skipBenchmarkFrames = 0;
        numBenchmarkFrames  = 2048;
        interactive = false;
      }, "--nodisplay: enabled benchmark mode, continously renders frames");

    registerOption("print-frame-rate", [this] (Ref<ParseStream> cin, const FileName& path) {
        print_frame_rate = true;
      }, "--print-frame-rate: prints framerate for each frame on console");

     registerOption("print-camera", [this] (Ref<ParseStream> cin, const FileName& path) {
         print_camera = true;
      }, "--print-camera: prints camera for each frame on console");

     registerOption("debug0", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug0 = cin->getInt();
       }, "--debug0: sets internal debugging value");

     registerOption("debug1", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug1 = cin->getInt();
       }, "--debug1: sets internal debugging value");

     registerOption("debug2", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug2 = cin->getInt();
       }, "--debug2: sets internal debugging value");

     registerOption("debug3", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug3 = cin->getInt();
       }, "--debug3: sets internal debugging value");

    /* output filename */
    registerOption("shader", [this] (Ref<ParseStream> cin, const FileName& path) {
        std::string mode = cin->getString();
        if      (mode == "default" ) shader = SHADER_DEFAULT;
        else if (mode == "eyelight") shader = SHADER_EYELIGHT;
        else if (mode == "occlusion") shader = SHADER_OCCLUSION;
        else if (mode == "uv"      ) shader = SHADER_UV;
        else if (mode == "texcoords") shader = SHADER_TEXCOORDS;
        else if (mode == "texcoords-grid") shader = SHADER_TEXCOORDS_GRID;
        else if (mode == "Ng"      ) shader = SHADER_NG;
        else if (mode == "cycles"  ) { shader = SHADER_CYCLES; scale = cin->getFloat(); }
        else if (mode == "geomID"  ) shader = SHADER_GEOMID;
        else if (mode == "primID"  ) shader = SHADER_GEOMID_PRIMID;
        else if (mode == "ao"      ) shader = SHADER_AMBIENT_OCCLUSION;
        else throw std::runtime_error("invalid shader:" +mode);
      },
      "--shader <string>: sets shader to use at startup\n"
      "  default: default tutorial shader\n"
      "  eyelight: eyelight shading\n"
      "  occlusion: occlusion shading\n"
      "  uv: uv debug shader\n"
      "  texcoords: texture coordinate debug shader\n"
      "  texcoords-grid: grid texture debug shader\n"
      "  Ng: visualization of shading normal\n"
      "  cycles <float>: CPU cycle visualization\n"
      "  geomID: visualization of geometry ID\n"
      "  primID: visualization of geometry and primitive ID\n"
      "  ao: ambient occlusion shader");

    if (features & FEATURE_STREAM)
    {
      /* register parsing of stream mode */
      registerOption("mode", [] (Ref<ParseStream> cin, const FileName& path) {
          std::string mode = cin->getString();
          if      (mode == "normal") g_mode = MODE_NORMAL;
          else if (mode == "stream") g_mode = MODE_STREAM;
          else throw std::runtime_error("invalid mode:" +mode);
        },
        "--mode: sets rendering mode\n"
        "  normal  : normal mode\n"
        "  stream  : stream mode\n");
    }

    registerOption("coherent", [this] (Ref<ParseStream> cin, const FileName& path) {
        g_iflags_coherent   = iflags_coherent   = RTC_INTERSECT_COHERENT;
        g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_COHERENT;
      }, "--coherent: force using RTC_INTERSECT_COHERENT hint when tracing rays");

    registerOption("incoherent", [this] (Ref<ParseStream> cin, const FileName& path) {
        g_iflags_coherent   = iflags_coherent   = RTC_INTERSECT_INCOHERENT;
        g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_INCOHERENT;
      }, "--incoherent: force using RTC_INTERSECT_INCOHERENT hint when tracing rays");
  }
示例#20
0
文件: main.c 项目: tmpvar/cpu-voxels
int main(void)
{

  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  int width = 800, height = 600;
  GLFWwindow* window;
  glfwSetErrorCallback(error_callback);
  if (!glfwInit())
    exit(EXIT_FAILURE);
  window = glfwCreateWindow(width, height, "cpu-voxels", NULL, NULL);
  if (!window)
  {
    glfwTerminate();
    return 1;
  }
  glfwMakeContextCurrent(window);
  glfwSwapInterval(0);
  glfwSetKeyCallback(window, key_callback);
  glfwSetMouseButtonCallback(window, mouse_button_callback);
  glfwSetCursorPosCallback(window, mouse_move_callback);
  glfwSetKeyCallback(window, key_callback);

  vec3 eye = vec3_create(0.0f, 0.0f, VOXEL_BRICK_SIZE * 4);
  vec3 center = vec3f(0.0f);
  vec3 up = vec3_create(0.0, 1.0, 0.0 );

  orbit_camera_init(eye, center, up);

  // TODO: handle resize

  int dw, dh;
  glfwGetFramebufferSize(window, &dw, &dh);
  int stride = 3;
  int total = dw*dh*stride;
  uint8_t *data = malloc(total);

  vec3 ro; //, rd;
  mat4 m4inverted, view;
  mat4 projection;
  mat4_perspective(
    projection,
    M_PI/4.0,
    (float)width/(float)height,
    0.1,
    1000.0
  );
  GLuint texture[1];

#ifdef ENABLE_THREADS
  screen_area areas[TOTAL_THREADS];
  threadpool thpool = thpool_init(TOTAL_THREADS);
#else
  screen_area areas[1];
#endif

  glGenTextures(1, texture);
  float start = glfwGetTime();
  int fps = 0;
  voxel_brick my_first_brick = voxel_brick_create();
  // TODO: make this work when the brick lb corner is not oriented at 0,0,0
  voxel_brick_position(my_first_brick, vec3f(0.0f));
  voxel_brick_fill(my_first_brick, &brick_fill);

  while (!glfwWindowShouldClose(window)) {
    if (glfwGetKey(window, GLFW_KEY_LEFT) == GLFW_PRESS) {
      orbit_camera_rotate(0, 0, -.1, 0);
    }

    if (glfwGetKey(window, GLFW_KEY_RIGHT) == GLFW_PRESS) {
      orbit_camera_rotate(0, 0, .1, 0);
    }

    if (glfwGetKey(window, GLFW_KEY_UP) == GLFW_PRESS) {
      orbit_camera_rotate(0, 0, 0, .1);
    }

    if (glfwGetKey(window, GLFW_KEY_DOWN) == GLFW_PRESS) {
      orbit_camera_rotate(0, 0, 0, -.1);
    }

    glfwGetFramebufferSize(window, &width, &height);
    float now = glfwGetTime();
    if (now - start > 1) {
      unsigned long long total_rays = (fps * width * height);
      printf("fps: %i (%f Mrays/s)@%ix%i - %i threads\n", fps, total_rays/1000000.0, width, height, TOTAL_THREADS);
      start = now;
      fps = 0;
    }
    fps++;


    orbit_camera_view(view);
    ro = mat4_get_eye(view);

    mat4_mul(m4inverted, projection, view);
    mat4_invert(m4inverted, m4inverted);

    // compute 3 points so that we can interpolate instead of unprojecting
    // on every point
    vec3 rda, rdb, planeYPosition, dcol, drow;

    vec3 t0 = vec3_create(0, 0, 0), tx = vec3_create(1, 0, 0), ty = vec3_create(0, 1, 0);
    vec4 viewport = { 0, 0, width, height };

    rda = orbit_camera_unproject(t0, viewport, m4inverted);
    rdb = orbit_camera_unproject(tx, viewport, m4inverted);
    planeYPosition = orbit_camera_unproject(ty, viewport, m4inverted);
    dcol = planeYPosition - rda;
    drow = rdb - rda;

    int i=0, bh = height;
#ifdef ENABLE_THREADS
    bh = (height/TOTAL_THREADS);

    for (i; i<TOTAL_THREADS; i++) {
#endif
      areas[i].dcol = dcol;
      areas[i].drow = drow;
      areas[i].pos = planeYPosition;
      areas[i].ro = ro;
      areas[i].x = 0;
      areas[i].y = i*bh;
      areas[i].width = width;
      areas[i].height = areas[i].y + (int)(bh);
      areas[i].screen_height = (int)(height);
      areas[i].stride = stride;
      areas[i].data = data;
      areas[i].render_id = i;
      areas[i].brick = my_first_brick;
#ifdef ENABLE_THREADS
      thpool_add_work(thpool, (void *)render_screen_area, (void *)(&areas[i]));
    }

    thpool_wait(thpool);
#else
    render_screen_area((void *)(&areas[i]));
#endif

#ifdef RENDER
    glViewport(0, 0, width, height);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glDisable(GL_CULL_FACE);
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    glMatrixMode(GL_TEXTURE);
    glLoadIdentity();
    glScalef(1.0f, -1.0f, 1.0f);

    glEnable(GL_TEXTURE_2D);

    glBindTexture(GL_TEXTURE_2D, texture[0]);
    glTexImage2D(GL_TEXTURE_2D, 0, 3, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data);

    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);

    glBegin(GL_QUADS);
      glTexCoord2f(0.0f, 0.0f); glVertex2f( -1, -1);
      glTexCoord2f(1.0f, 0.0f); glVertex2f(  1, -1);
      glTexCoord2f(1.0f, 1.0f); glVertex2f(  1,  1);
      glTexCoord2f(0.0f, 1.0f); glVertex2f( -1,  1);
    glEnd();

    glfwSwapBuffers(window);

    glDeleteTextures(1, &texture[0]);
#endif

    glfwPollEvents();
  }
  glfwDestroyWindow(window);
  glfwTerminate();
  exit(EXIT_SUCCESS);
}
c3_i
main(c3_i   argc,
     c3_c** argv)
{
  // set both logging systems to unit-ed
  //
  u2K->inited_t = c3_false;

  c3_w kno_w;

  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  //  Parse options.
  //
  if ( u2_no == _main_getopt(argc, argv) ) {
    u2_ve_usage(argc, argv);
    return 1;
  }


  u2_ve_sysopt();

  printf("~\n");
  printf("welcome.\n");
  printf("vere: urbit home is %s\n", u2_Host.cpu_c);
  printf("vere: hostname is %s\n", u2_Host.ops_u.nam_c);

  if ( u2_yes == u2_Host.ops_u.dem && u2_no == u2_Host.ops_u.bat ) {
    printf("Starting daemon\n");
  }

  //  Seed prng. Don't panic -- just for fuzz testing and election timeouts.
  //
  srand(getpid());

  //  Instantiate process globals.
  {
    u2_wr_check_init(u2_Host.cpu_c);
    u2_Host.xit_i = 0;

    if ( (u2_no == u2_Host.ops_u.nuu) &&
          (u2_yes == u2_loom_load()) )
    {
      u2_Host.wir_r = u2_ray_of(0, 0);
      u2_Wire = u2_Host.wir_r;

      u2_Host.arv_u = u2_Arv;

      u2_Arv->ova.egg_u = u2_Arv->ova.geg_u = 0;

      u2_lo_grab("init", u2_none);

      //  Horrible ancient stuff.
      //
      kno_w = u2_Host.arv_u->kno_w;
      u2_Host.kno_w = kno_w;

      u2_ho_push();
    }
    else {
      u2_loom_boot();
      u2_Host.wir_r = u2_wr_init(c3__rock, u2_ray_of(0, 0), u2_ray_of(1, 0));
      u2_Wire = u2_Host.wir_r;

      u2_Host.arv_u = u2_Arv;
    }
  }

  //  If we have not loaded from checkpoint, build kernel.
  //
  if ( 0 != u2_Host.arv_u->ent_d ) {
    u2_reck_time(u2_Host.arv_u);
    u2_reck_numb(u2_Host.arv_u);
    {
      c3_c* dyt_c = u2_cr_string(u2_Host.arv_u->wen);

      printf("time: %s\n", dyt_c);
      free(dyt_c);
    }
  }
  else {
    //  Set outside bail trap.  Should not be used, but you never know...
    //
    if ( 0 != u2_cm_trap() ) {
      u2_ve_panic(argc, argv);
    }
    else {
      //  Set boot and goal stages.
      {
        if ( (0 == u2_Host.ops_u.kno_w) || (u2_Host.ops_u.kno_w > 255) ) {
          kno_w = DefaultKernel;
        } else {
          kno_w = u2_Host.ops_u.kno_w;
        }
      }

      //  Load the system.
      //
      {
        u2_Host.kno_w = u2_Host.ops_u.kno_w;

        u2_reck_boot(u2_Host.arv_u);
      }
      u2_cm_done();
    }
  }

  //  Install signal handlers and set buffers.
  //
  //  Note that we use the sigmask-restoring variant.  Essentially, when
  //  we get a signal, we force the system back into the just-booted state.
  //  If anything goes wrong during boot (above), it's curtains.
  {
    if ( 0 != sigsetjmp(Signal_buf, 1) ) {
      switch ( Sigcause ) {
        case sig_overflow: printf("[stack overflow]\r\n"); break;
        case sig_interrupt: printf("[interrupt]\r\n"); break;
        default: printf("[signal error!]\r\n"); break;
      }
      Sigcause = sig_none;

      signal(SIGINT, SIG_DFL);
      stackoverflow_deinstall_handler();

      //  Print the trace, do a GC, etc.
      //
      //  This is half-assed at present, so we exit.
      //
      u2_lo_sway(0, u2k(u2_wire_tax(u2_Wire)));

      u2_lo_bail(u2_Host.arv_u);

      exit(1);
    }
#if 1
    if ( -1 == stackoverflow_install_handler
        (overflow_handler, Sigstk, SIGSTKSZ) )
    {
      fprintf(stderr, "overflow_handler: install failed\n");
      exit(1);
    }
    signal(SIGINT, interrupt_handler);
    signal(SIGIO, SIG_IGN);
#endif
  }

  u2_lo_grab("main", u2_none);

  // booted in admin mode: do a task, then exit
  // booted in user mode: do command loop

  if (u2_Host.ops_u.adm_c != 0) {
    if      (strcmp(u2_Host.ops_u.adm_c, "edmp") ==0) { u2_egz_admin_dump_egz(); }
    else if (strcmp(u2_Host.ops_u.adm_c, "etok") ==0) { u2_kafka_admin_egz_to_kafka(); }
    else if (strcmp(u2_Host.ops_u.adm_c, "ktoe") ==0) { u2_kafka_admin_kafka_to_egz(); }
    else if (strcmp(u2_Host.ops_u.adm_c, "kcnf") ==0) { u2_lo_loop(); } // do it in the app
    else                                              { fprintf(stderr, "unsupported admin mode command %s\n", u2_Host.ops_u.adm_c); exit(1); }
  } else {
    u2_lo_loop();
  }

  return 0;
}
示例#22
0
inline void AVOIDDENORMALS()
{
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
}
示例#23
0
文件: run_nmf.c 项目: duhaime/pynmf
void update_div(matrix W, matrix H, matrix X, const float thresh, const int max_iter, double *t,int verbose){
    //run iterative multiplicative updates on W,H


    //initialize temp matrices -----------------------
    //matrix to hold W*H
    matrix WH;
    create_matrix(&WH, W.dim[0], H.dim[1], 0.0);

    //matrix to hold X./(W*H+EPS)
    matrix Z;
    create_matrix(&Z, X.dim[0], X.dim[1], 0.0);

    //matrix to hold W'*Z
    matrix WtZ;
    create_matrix(&WtZ, W.dim[1], Z.dim[1], 0.0);

    //matrix to hold Z*H'
    matrix ZHt;
    create_matrix(&ZHt, Z.dim[0], H.dim[0], 0.0);

    //matrix to hold sum(W) [sum cols of W]
    matrix sumW;
    create_matrix(&sumW, 1, W.dim[1] ,0.0);

    //matrix to hold sum(H,2) [sum rows of H]
    matrix sumH2;
    create_matrix(&sumH2, H.dim[0], 1, 0.0);
    
    int i;
    
    if(t==NULL){
	double t_array[TIMERS];
	t = t_array;
	for(i=0;i<TIMERS;i++)
	    t[i] = 0;
    }

    //turn on the FTZ(15) and DAZ(6) bits in the floating point control register
    //FTZ = flush-to-zero, DAZ = denormal-as-zero
    //without these, sgemms slow down significantly as values approach zero
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    // the following does the same thing (by Waterman)
    /*
    unsigned int mxcsr;
    __asm__ __volatile__ ("stmxcsr (%0)" : : "r"(&mxcsr) : "memory");
    //mxcsr = (mxcsr | (1<<15) | (1<<6)) & ~((1<<11) | (1<<8));
    mxcsr = (mxcsr | (1<<15) | (1<<6)); 
    __asm__ __volatile__ ("ldmxcsr (%0)" : : "r"(&mxcsr));
    */


     

    float diff,div,prev_div,change;
    matrix_multiply(W,H,WH,mkl_threads);
    diff = matrix_difference_norm(X,WH, check_threads);
    prev_div = matrix_div(X,WH,check_threads);
    div = prev_div;
    if(verbose)
    {
	printf("OpenMP threads: %i\n",omp_threads);
	printf("i: %4i, error: %6.4f, div: %8.4e\n",0,diff,prev_div);
    }

    t[0] -= get_time();
    for(i=0;i<max_iter;i++){

	//check for convergence, print status
	if(i % ITER_CHECK == 0 && i != 0){
	    double tt = get_time();
	    matrix_multiply(W,H,WH,mkl_threads);
	    diff = matrix_difference_norm(X,WH,check_threads);
	    prev_div = div;
	    div = matrix_div(X,WH,check_threads);
	    change = (prev_div-div)/prev_div;
	    if(verbose)
		printf("i: %4i, error: %6.4f, div: %8.4e, change: %8.5f\n",
			i,diff,div,change);
	    if(change < thresh){
		printf("converged\n");
		break;
	    }
	    tt = get_time()-tt;
	    t[9] += tt;
	}
	    

	/* matlab algorithm
	   Z = X./(W*H+eps);
	   H = H.*(W'*Z)./(repmat(sum(W)',1,F));

	   Z = X./(W*H+eps);
	   W = W.*(Z*H')./(repmat(sum(H,2)',N,1));
	   */
		
	//
	// UPDATE H -----------------------------
	//

	//WH = W*H
	t[1] -= get_time();
	t[10] -= get_time();
	//matrix_eps(W,eps_threads);
	//matrix_eps(H,eps_threads);
	matrix_multiply(W,H,WH,mkl_threads);
	t[1] += get_time();
	t[10] += get_time();

	//WH = WH+EPS
	t[2] -= get_time();
	matrix_eps(WH,eps_threads);
	t[2] += get_time();

	//Z = X./WH
	t[3] -= get_time();
	element_divide(X,WH,Z,vecdiv_threads);
	t[3] += get_time();


	//sum cols of W into row vector
	t[6] -= get_time();
	sum_cols(W,sumW,sumcols_threads);
	t[6] += get_time();

	//convert sumW to col vector
	sumW.dim[0] = sumW.dim[1];
	sumW.dim[1] = 1;

	//WtZ = W'*Z
	t[1] -= get_time();
	t[11] -= get_time();
	matrix_multiply_AtB(W,Z,WtZ,mkl_threads);
	t[1] += get_time();
	t[11] += get_time();

	//WtZ = WtZ./(repmat(sum(W)',1,H.dim[1])
	//[element divide cols of WtZ by sumW']
	t[7] -= get_time();
	col_divide(WtZ,sumW,WtZ,coldiv_threads);
	t[7] += get_time();

	//H = H.*WtZ
	t[4] -= get_time();
	element_multiply(H,WtZ,H,vecmult_threads);
	t[4] += get_time();
	
	
	//
	// UPDATE W ---------------------------
	//

	//WH = W*H
	t[1] -= get_time();
	t[12] -= get_time();
	matrix_multiply(W,H,WH,mkl_threads);
	t[1] += get_time();
	t[12] += get_time();

	//WH = WH+EPS
	t[2] -= get_time();
	matrix_eps(WH,eps_threads);
	t[2] += get_time();

	//Z = X./WH
	t[3] -= get_time();
	element_divide(X,WH,Z,vecdiv_threads);
	t[3] += get_time();

	//sum rows of H into col vector
	t[5] -= get_time();
	sum_rows(H,sumH2,sumrows_threads);
	t[5] += get_time();

	//convert sumH2 to row vector
	sumH2.dim[1] = sumH2.dim[0];
	sumH2.dim[0] = 1;

	//ZHt = Z*H'
	t[1] -= get_time();
	t[13] -= get_time();
	matrix_multiply_ABt(Z,H,ZHt,mkl_threads);
	t[1] += get_time();
	t[13] += get_time();

	//ZHt = ZHt./(repmat(sum(H,2)',W.dim[0],1)
	//[element divide rows of ZHt by sumH2']
	t[8] -= get_time();
	row_divide(ZHt,sumH2,ZHt,rowdiv_threads);
	t[8] += get_time();

	//W = W.*ZHt
	t[4] -= get_time();
	element_multiply(W,ZHt,W,vecmult_threads);
	t[4] += get_time();



	// ------------------------------------

	//reset sumW to row vector
	sumW.dim[1] = sumW.dim[0];
	sumW.dim[0] = 1;
	//reset sumH2 to col vector
	sumH2.dim[0] = sumH2.dim[1];
	sumH2.dim[1] = 1;

	// ---------------------------------------
	
	    
    }

    t[0] += get_time();


    matrix_multiply(W,H,WH,mkl_threads);
    diff = matrix_difference_norm(X,WH,check_threads);
    prev_div = div;
    div = matrix_div(X,WH,check_threads);
    change = (prev_div-div)/prev_div;
    if(verbose){
	printf("i: %4i, error: %6.4f, div: %8.4e, change: %8.5f\n",
		i,diff,div,change);


	printf("\n");
	for(i=0;i<TIMERS;i++)
	    printf("t[%i]: %8.3f (%6.2f %%) %s\n",i,t[i],t[i]/t[0]*100,tname[i]);
    }


    //free temporary matrices
    destroy_matrix(&WH);
    destroy_matrix(&Z);
    destroy_matrix(&WtZ);
    destroy_matrix(&ZHt);
    destroy_matrix(&sumW);
    destroy_matrix(&sumH2);


}
EXTERN_C_ENTER

JNIEXPORT void JNICALL Java_org_lwjgl_util_simd_SSE3__1MM_1SET_1DENORMALS_1ZERO_1MODE(JNIEnv *__env, jclass clazz, jint mode) {
	UNUSED_PARAMS(__env, clazz)
	_MM_SET_DENORMALS_ZERO_MODE(mode);
}