Exemplo n.º 1
0
void Mixer::fifoWriter::run()
{
// set denormal protection for this thread
#ifdef __SSE3__
/* DAZ flag */
	_MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON );
#endif
#ifdef __SSE__
/* FTZ flag */
	_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif

#if 0
#ifdef LMMS_BUILD_LINUX
#ifdef LMMS_HAVE_SCHED_H
	cpu_set_t mask;
	CPU_ZERO( &mask );
	CPU_SET( 0, &mask );
	sched_setaffinity( 0, sizeof( mask ), &mask );
#endif
#endif
#endif

	const fpp_t frames = m_mixer->framesPerPeriod();
	while( m_writing )
	{
		surroundSampleFrame * buffer = new surroundSampleFrame[frames];
		const surroundSampleFrame * b = m_mixer->renderNextBuffer();
		memcpy( buffer, b, frames * sizeof( surroundSampleFrame ) );
		m_fifo->write( buffer );
	}

	m_fifo->write( NULL );
}
Exemplo n.º 2
0
void ofx_activate_denormal_flush(){

#ifdef OFX_SIMD_USE_SSE
        //mode to flush denormals values, needed for fast multiplication
        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif
};
Exemplo n.º 3
0
void TimeLagFilterCore::DTCalcThread::run(){
    //Disable denormalized floats
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    //Start
    float b, c;
    while(!threadShouldExit()){
        if(!core.paramsChanged) wait(-1);
        if(threadShouldExit()) return;
        if(core.sampleSwapDT){ 
            //Don't recalculate if waiting for sample to finish
            wait(1); //Try again soon
        }else{
            //Copy write to calc
            {
                const ScopedWriteLock writeLock(core.ctLock);
                memcpy(core.ct_calc, core.ct_write, core.num_filters * sizeof (CTParams));
                core.paramsChanged = false;
            }
            //Calculate filter coefficients
            const ScopedWriteLock writeLock(core.dtLock);
            for(int i=0; i<core.num_filters; ++i){
                filtercalculations(core.reduced_fs, core.ct_calc[i].center, core.ct_calc[i].bw, &b, &c);
                core.dt_calc[i].b = b;
                core.dt_calc[i].c = c;
            }
            core.sampleSwapDT = true;
        }
    }
}
Exemplo n.º 4
0
  /* main function in embree namespace */
  int main(int argc, char** argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* set default camera */
    g_camera.from = Vec3fa(2.5f,2.5f,2.5f);
    g_camera.to   = Vec3fa(0.0f,0.0f,0.0f);

    /* create stream for parsing */
    Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv));

    /* parse command line */  
    parseCommandLine(stream, FileName());
    if (g_numThreads) 
      g_rtcore += ",threads=" + toString(g_numThreads);

    /* initialize ray tracing core */
    init(g_rtcore.c_str());

    /* render to disk */
    if (outFilename.str() != "") {
      renderToFile(outFilename);
      return 0;
    } 

    /* initialize GLUT */
    initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
    
    /* enter the GLUT run loop */
    enterWindowRunLoop();

    return 0;
  }
Exemplo n.º 5
0
  /* main function in embree namespace */
  int main(int argc, char** argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* initialize ray tracing core and force bvh4.triangle4v hierarchy for triangles */
    rtcInit("tri_accel=bvh4.triangle4v");
    
    /* set error handler */
    rtcSetErrorFunction(error_handler);
    
    /* create scene */
    g_scene = rtcNewScene(RTC_SCENE_STATIC,RTC_INTERSECT1);
    addCube(g_scene,Vec3fa(-1,0,0));
    addCube(g_scene,Vec3fa(1,0,0));
    addCube(g_scene,Vec3fa(0,0,-1));
    addCube(g_scene,Vec3fa(0,0,1));
    addHair(g_scene);
    addGroundPlane(g_scene);
    rtcCommit (g_scene);

    /* print triangle BVH */
    print_bvh(g_scene);

    /* cleanup */
    rtcDeleteScene (g_scene);
    rtcExit();
    return 0;
  }
Exemplo n.º 6
0
/* main function in embree namespace */
int main(int argc, char** argv)
{
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    /* create stream for parsing */
    Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv));

    /* parse command line */
    parseCommandLine(stream, FileName());

    /* load default scene if none specified */
    if (filename.ext() == "") {
        FileName file = FileName::executableFolder() + FileName("models/cornell_box.ecs");
        parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path());
    }

    /* configure number of threads */
    if (g_numThreads)
        g_rtcore += ",threads=" + std::to_string((long long)g_numThreads);
    if (g_numBenchmarkFrames)
        g_rtcore += ",benchmark=1";

    g_rtcore += g_subdiv_mode;

    /* load scene */
    if (strlwr(filename.ext()) == std::string("obj")) {
        g_scene->add(loadOBJ(filename,g_subdiv_mode != ""));
    }
    else if (strlwr(filename.ext()) == std::string("xml")) {
        g_scene->add(loadXML(filename,one));
    }
    else if (filename.ext() != "")
        THROW_RUNTIME_ERROR("invalid scene type: "+strlwr(filename.ext()));

    /* initialize ray tracing core */
    init(g_rtcore.c_str());

    /* send model */
    g_obj_scene.add(g_scene.dynamicCast<SceneGraph::Node>(),g_instancing_mode);
    g_scene = nullptr;
    set_scene(&g_obj_scene);

    /* benchmark mode */
    if (g_numBenchmarkFrames)
        renderBenchmark(outFilename);

    /* render to disk */
    if (outFilename.str() != "")
        renderToFile(outFilename);

    /* interactive mode */
    if (g_interactive) {
        initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
        enterWindowRunLoop(g_anim_mode);
    }

    return 0;
}
  int main(int argc, char **argv) 
  {
    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    std::cout << " === Possible cmd line options: -pregenerate, -cache === " << std::endl;

    /* set default camera */
    g_camera.from = Vec3fa(1.5f,1.5f,-1.5f);
    g_camera.to   = Vec3fa(0.0f,0.0f,0.0f);

    /*! Parse command line options. */  
    parseCommandLine(new ParseStream(new CommandLineStream(argc, argv)), FileName());

    /*! Set the thread count in the Embree configuration string. */
    if (g_numThreads) g_rtcore += ",threads=" + std::to_string((long long)g_numThreads);
    g_rtcore += g_subdiv_mode;

    /*! Initialize Embree state. */
    init(g_rtcore.c_str());

    /* render to disk */
    if (outFilename.str() != "")
      renderToFile(outFilename);
    
    /* interactive mode */
    if (g_interactive) {
      initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen);
      enterWindowRunLoop();
    }
    return 0;
  }
Exemplo n.º 8
0
void nova_server::prepare_backend(void)
{
    /* register audio backend ports */
    const int blocksize = get_audio_blocksize();
    const int input_channels = get_input_count();
    const int output_channels = get_output_count();

    std::vector<sample*> inputs, outputs;
    for (int channel = 0; channel != input_channels; ++channel)
        inputs.push_back(sc_factory->world.mAudioBus + (blocksize * (output_channels + channel)));

    audio_backend::input_mapping(inputs.begin(), inputs.end());

    for (int channel = 0; channel != output_channels; ++channel)
        outputs.push_back(sc_factory->world.mAudioBus + blocksize * channel);

    audio_backend::output_mapping(outputs.begin(), outputs.end());

#ifdef __SSE__
    /* denormal handling */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _mm_setcsr(_mm_getcsr() | 0x40);
#endif

    time_per_tick = time_tag::from_samples(blocksize, get_samplerate());
}
Exemplo n.º 9
0
CWorld::CWorld(void)
{
	m_pWorld = 0;
	m_pMemoryRouter = 0;
	m_collisionFilter = 0;
	m_pCell = 0;

	m_suspended = 0;
	
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
	m_pMemoryRouter = hkMemoryInitUtil::initDefault( hkMallocAllocator::m_defaultMallocAllocator, hkMemorySystem::FrameInfo( 128 * 1024 * 1024 ) );
	hkBaseSystem::init( m_pMemoryRouter, errorReport );
	m_threadInit.insert(GetCurrentThreadId());
		
	int numThreads = 1;
	hkHardwareInfo hwInfo;
	hkGetHardwareInfo(hwInfo);
	numThreads = hwInfo.m_numThreads*2;
	LogInfo("System run with %d threads", numThreads);

	hkJobQueueCinfo jobQueueInfo;
	jobQueueInfo.m_jobQueueHwSetup.m_numCpuThreads = numThreads+1;
	m_jobQueue = new hkJobQueue(jobQueueInfo);
	
	hkCpuJobThreadPoolCinfo jobPoolInfo;
	jobPoolInfo.m_numThreads = numThreads;
	m_jobThreadPool = new hkCpuJobThreadPool(jobPoolInfo);

	hkpWorldCinfo info;
	info.m_gravity.set(0, 0, -9.8);
	//info.m_gravity.set(0, 0, 0);
	info.setBroadPhaseWorldSize(1e+6);
	info.m_broadPhaseType = hkpWorldCinfo::BROADPHASE_TYPE_SAP;
	info.m_broadPhaseBorderBehaviour = hkpWorldCinfo::BROADPHASE_BORDER_DO_NOTHING;
	info.setupSolverInfo(hkpWorldCinfo::SOLVER_TYPE_8ITERS_HARD);
	info.m_simulationType = hkpWorldCinfo::SIMULATION_TYPE_MULTITHREADED;
	info.m_enableDeactivation = false;

	m_pWorld = new hkpWorld(info);
	m_pWorld->markForWrite();
	
	auto* pFilter = new hkpConstraintCollisionFilter(new MyGroupFilter);
	m_collisionFilter = pFilter;
	m_pWorld->setCollisionFilter(pFilter);
	pFilter->init(m_pWorld);

	hkpWorld::registerWithJobQueue(m_jobQueue);
	hkpConstraintStabilizationUtil::setConstraintsSolvingMethod(m_pWorld, hkpConstraintAtom::METHOD_STABILIZED);
	hkpAgentRegisterUtil::registerAllAgents(m_pWorld->getCollisionDispatcher());
	
	m_pWorld->unmarkForWrite();
	
	m_timeLastUpdate = *timeStamp;

	LogInfo("Havok simulated world created.");
}
Exemplo n.º 10
0
int main(int argc, char* argv[])
{
  /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  ispcEntry();
  
  return 0;
}
Exemplo n.º 11
0
bool Context::setFlushDenormal(bool on) {
#ifdef USE_SSE3
  // Setting flush-to-zero (FTZ) flag
  _MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON
                             : _MM_FLUSH_ZERO_OFF);

  // Setting denormals-are-zero (DAZ) flag
  _MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON
                                 : _MM_DENORMALS_ZERO_OFF);
  return true;
#else
  return false;
#endif
}
Exemplo n.º 12
0
int main(int argc, char* argv[])
{
  /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  /* create new Embree device */
  RTCDevice device = rtcNewDevice("verbose=1");

  /* ddelete device again */
  rtcDeleteDevice(device);
  
  return 0;
}
Exemplo n.º 13
0
void initMain(int argc, char** argv) {
  installLayerStackTracer();
  std::string line;
  for (int i = 0; i < argc; ++i) {
    line += argv[i];
    line += ' ';
  }

#ifndef GFLAGS_GFLAGS_H_
  namespace gflags = google;
#endif

  gflags::ParseCommandLineFlags(&argc, &argv, true);
  initializeLogging(argc, argv);
  LOG(INFO) << "commandline: " << line;
  CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];

  installProfilerSwitch();

#ifdef __SSE__
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif
#ifdef __SSE3__
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif

  if (FLAGS_seed == 0) {
    unsigned int t = time(NULL);
    srand(t);
    ThreadLocalRand::initSeed(t);
    LOG(INFO) << "random number seed=" << t;
  } else {
    srand(FLAGS_seed);
    ThreadLocalRand::initSeed(FLAGS_seed);
  }

  if (FLAGS_use_gpu) {
    // This is the initialization of the CUDA environment,
    // need before runInitFunctions.
    // TODO(hedaoyuan) Can be considered in the runInitFunctions,
    // but to ensure that it is the first to initialize.
    hl_start();
    hl_init(FLAGS_gpu_id);
  }

  version::printVersion();
  checkCPUFeature().check();
  runInitFunctions();
}
Exemplo n.º 14
0
void _initialize_cpu_thread	()
{
	debug_on_thread_spawn	();
#ifndef XRCORE_STATIC
	// fpu & sse 
	FPU::m24r	();
#endif  // XRCORE_STATIC
	if (CPU::ID.feature&_CPU_FEATURE_SSE)	{
		//_mm_setcsr ( _mm_getcsr() | (_MM_FLUSH_ZERO_ON+_MM_DENORMALS_ZERO_ON) );
		_MM_SET_FLUSH_ZERO_MODE			(_MM_FLUSH_ZERO_ON);
		if (_denormals_are_zero_supported)	{
			__try	{
				_MM_SET_DENORMALS_ZERO_MODE	(_MM_DENORMALS_ZERO_ON);
			} __except(EXCEPTION_EXECUTE_HANDLER) {
				_denormals_are_zero_supported	= FALSE;
			}
		}
	}
Exemplo n.º 15
0
void set_ftz(){

#if defined(__i386__) || defined(__x86_64__)
#ifndef _CRAYC
  _MM_SET_FLUSH_ZERO_MODE (_MM_FLUSH_ZERO_ON);
#endif

#elif defined(__PPC__) || defined(__PPC64__)

//    Altivec non-IEEE mode for subnormal (denormalized) values.
//  m*vscr requires vector types even for writing to registers (disturbing)
//  so the high order bits are index'd.
  
  vector unsigned short vscr = vec_mfvscr();
  vscr[1] |= 1;   // (1<<16) in reg
  vec_mtvscr(vscr);

#endif
}
Exemplo n.º 16
0
void TimeLagFilterCore::DlyCalcThread::run(){
    //Disable denormalized floats
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    //Start
    float phasesum, groupsum;
    float a, b, center, bw, ctrsquared;
    float phase, group;
    float w, wsquared, twow, dw;
    int f, nw;
    while(!threadShouldExit()){
        if(!core.paramsChangedDelay) wait(-1);
        if(threadShouldExit()) return;
        {
            const ScopedReadLock readLock(core.ctLock);
            //Calculate delays
            dw = core.getMaxCtr() / ResponseGraph::NUM_RESP_W;
            w = 0.0f;
            for(nw = 0; nw < ResponseGraph::NUM_RESP_W; ++nw){
                phasesum = 0.0f;
                groupsum = 0.0f;
                wsquared = w * w;
                twow = 2.0f * w;
                for(f=0; f<core.num_filters; ++f){
                    center = core.ct_write[f].center;
                    bw = core.ct_write[f].bw;
                    ctrsquared = center * center;
                    a = ctrsquared - wsquared;
                    b = twow * bw * center;
                    phase = -2.0f * atan2(b, a);
                    group = -4.0f * bw * center * (ctrsquared - (core.getMaxBW() * wsquared)) / (a*a + b*b);
                    phasesum += phase;
                    groupsum += group;
                }
                core.phasedelay[nw] = phasesum;
                core.groupdelay[nw] = groupsum;
                w += dw;
            }
            core.paramsChangedDelay = false;
        }
    }
}
Exemplo n.º 17
0
void MixerWorkerThread::run()
{
// set denormal protection for this thread
#ifdef __SSE3__
/* DAZ flag */
	_MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON );
#endif
#ifdef __SSE__
/* FTZ flag */
	_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif	
	QMutex m;
	while( m_quit == false )
	{
		m.lock();
		queueReadyWaitCond->wait( &m );
		globalJobQueue.run();
		m.unlock();
	}
}
Exemplo n.º 18
0
extern void HK_CALL DemoPlatformInit(hkDemoFrameworkOptions*)
{

#if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED
	// Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero.
	// Typically operations on denormals are very slow, up to 100 times slower than normal numbers.
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif


	HANDLE thread = GetCurrentThread();
	SetThreadIdealProcessor(thread, 0); // try to keep on single core
	// can use XP/Vista/Server2003 etc SetThreadAffinityMask too if we want to be more forceful

	// XAudio requires COM init
	// If the following line does not compile, it means that windows.h was included with
	// a lot for #defines to cut down linkage. A full windows.h include is required, with winnt ver >= 4.
	// In the demos this is done on the PCH, demos.h, before any Havok base windows includes.
	CoInitializeEx(0, COINIT_MULTITHREADED);

}
Exemplo n.º 19
0
void FLA_Init()
{
  if ( FLA_initialized == TRUE ) return;
  
  FLA_initialized = TRUE;

  FLA_Error_messages_init();

  FLA_Memory_leak_counter_init();

  FLA_Init_constants();

  FLA_Cntl_init();

#if FLA_VECTOR_INTRINSIC_TYPE == FLA_SSE_INTRINSICS
  _MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif

#ifdef FLA_ENABLE_SUPERMATRIX
  FLASH_Queue_init();
#endif
}
Exemplo n.º 20
0
void RayEngine::embreeInit() {

	cout << "Starting Embree..." << endl;

	// Init library
	Embree.device = rtcNewDevice(NULL);
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
	_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

	// Generate texture
	glGenTextures(1, &Embree.texture);
	glBindTexture(GL_TEXTURE_2D, Embree.texture);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glBindTexture(GL_TEXTURE_2D, 0);

	// Init scenes
	userData = this;
	for (uint i = 0; i < scenes.size(); i++)
		scenes[i]->embreeInit(Embree.device);

}
Exemplo n.º 21
0
void CWorld::DoUpdate()
{
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
	if(m_suspended) return;
	
	//ScanHair();

	m_lock.lock();

	auto currTime = m_useSeperatedClock ? clock()*0.001 : *timeStamp;
	auto interval = currTime - m_timeLastUpdate;

	if(interval > TIME_TICK * 0.5)
	{
		m_timeLastUpdate = currTime;
		ScanCell();
		//if(m_savedDeltaTime > TIME_TICK_US*2) m_savedDeltaTime = TIME_TICK_US*2;
		StepWorld(interval);
	}
	
	hkSkyrimMemoryAllocator::releaseAll();
	m_lock.unlock();
}
Exemplo n.º 22
0
/* exported for Rembedded.h */
void fpu_setup(Rboolean start)
{
    if (start) {
#ifdef __FreeBSD__
    fpsetmask(0);
#endif

#ifdef NEED___SETFPUCW
    __setfpucw(_FPU_IEEE);
#endif
#if (defined(__i386) || defined(__x86_64)) && defined(__INTEL_COMPILER) && __INTEL_COMPILER > 800
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);
#endif
    } else {
#ifdef __FreeBSD__
    fpsetmask(~0);
#endif

#ifdef NEED___SETFPUCW
    __setfpucw(_FPU_DEFAULT);
#endif
    }
}
Exemplo n.º 23
0
void sc_SetDenormalFlags()
{
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
}
Exemplo n.º 24
0
int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load_data, lua_State *L)
{
  double start_wtime = dt_get_wtime();

#ifndef __WIN32__
  if(getuid() == 0 || geteuid() == 0)
    printf(
        "WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n");
#endif

#if defined(__SSE__)
  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

  dt_set_signal_handlers();

#include "is_supported_platform.h"

  int sse2_supported = 0;

#ifdef HAVE_BUILTIN_CPU_SUPPORTS
  // NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC
  __builtin_cpu_init();
  sse2_supported = __builtin_cpu_supports("sse2");
#else
  sse2_supported = dt_detect_cpu_features() & CPU_FLAG_SSE2;
#endif
  if(!sse2_supported)
  {
    fprintf(stderr, "[dt_init] SSE2 instruction set is unavailable.\n");
    fprintf(stderr, "[dt_init] expect a LOT of functionality to be broken. you have been warned.\n");
  }

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD, 128 * 1024); /* use mmap() for large allocations */
#endif

  // make sure that stack/frame limits are good (musl)
  dt_set_rlimits();

  // we have to have our share dir in XDG_DATA_DIRS,
  // otherwise GTK+ won't find our logo for the about screen (and maybe other things)
  {
    const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS");
    gchar *new_xdg_data_dirs = NULL;
    gboolean set_env = TRUE;
    if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0')
    {
      // check if DARKTABLE_SHAREDIR is already in there
      gboolean found = FALSE;
      gchar **tokens = g_strsplit(xdg_data_dirs, G_SEARCHPATH_SEPARATOR_S, 0);
      // xdg_data_dirs is neither NULL nor empty => tokens != NULL
      for(char **iter = tokens; *iter != NULL; iter++)
        if(!strcmp(DARKTABLE_SHAREDIR, *iter))
        {
          found = TRUE;
          break;
        }
      g_strfreev(tokens);
      if(found)
        set_env = FALSE;
      else
        new_xdg_data_dirs = g_strjoin(G_SEARCHPATH_SEPARATOR_S, DARKTABLE_SHAREDIR, xdg_data_dirs, NULL);
    }
    else
    {
#ifndef _WIN32
      // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a
      // default
      if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share")
         || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/")
         || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/"))
        new_xdg_data_dirs = g_strdup("/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/");
      else
        new_xdg_data_dirs = g_strdup_printf("%s" G_SEARCHPATH_SEPARATOR_S "/usr/local/share/" G_SEARCHPATH_SEPARATOR_S
                                            "/usr/share/", DARKTABLE_SHAREDIR);
#else
      set_env = FALSE;
#endif
    }

    if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1);
    g_free(new_xdg_data_dirs);
  }

  setlocale(LC_ALL, "");
  bindtextdomain(GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset(GETTEXT_PACKAGE, "UTF-8");
  textdomain(GETTEXT_PACKAGE);

  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.start_wtime = start_wtime;

  darktable.progname = argv[0];

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t));

  // database
  char *dbfilename_from_command = NULL;
  char *noiseprofiles_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

#ifdef HAVE_OPENCL
  gboolean exclude_opencl = FALSE;
  gboolean print_statistics = strcmp(argv[0], "darktable-cltest");
#endif

#ifdef USE_LUA
  char *lua_command = NULL;
#endif

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *config_override = NULL;
  for(int k = 1; k < argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
#ifdef USE_LUA
        const char *lua_api_version = strcmp(LUA_API_VERSION_SUFFIX, "") ?
                                      STR(LUA_API_VERSION_MAJOR) "."
                                      STR(LUA_API_VERSION_MINOR) "."
                                      STR(LUA_API_VERSION_PATCH) "-"
                                      LUA_API_VERSION_SUFFIX :
                                      STR(LUA_API_VERSION_MAJOR) "."
                                      STR(LUA_API_VERSION_MINOR) "."
                                      STR(LUA_API_VERSION_PATCH);
#endif
        printf("this is %s\ncopyright (c) 2009-%s johannes hanika\n" PACKAGE_BUGREPORT "\n\ncompile options:\n"
               "  bit depth is %s\n"
#ifdef _DEBUG
               "  debug build\n"
#else
               "  normal build\n"
#endif
#if defined(__SSE2__) && defined(__SSE__)
               "  SSE2 optimized codepath enabled\n"
#else
               "  SSE2 optimized codepath disabled\n"
#endif
#ifdef _OPENMP
               "  OpenMP support enabled\n"
#else
               "  OpenMP support disabled\n"
#endif

#ifdef HAVE_OPENCL
               "  OpenCL support enabled\n"
#else
               "  OpenCL support disabled\n"
#endif

#ifdef USE_LUA
               "  Lua support enabled, API version %s\n"
#else
               "  Lua support disabled\n"
#endif

#ifdef USE_COLORDGTK
               "  Colord support enabled\n"
#else
               "  Colord support disabled\n"
#endif

#ifdef HAVE_GPHOTO2
               "  gPhoto2 support enabled\n"
#else
               "  gPhoto2 support disabled\n"
#endif

#ifdef HAVE_GRAPHICSMAGICK
               "  GraphicsMagick support enabled\n"
#else
               "  GraphicsMagick support disabled\n"
#endif

#ifdef HAVE_OPENEXR
               "  OpenEXR support enabled\n"
#else
               "  OpenEXR support disabled\n"
#endif
               ,
               darktable_package_string,
               darktable_last_commit_year,
               (sizeof(void *) == 8 ? "64 bit" : sizeof(void *) == 4 ? "32 bit" : "unknown")
#if USE_LUA
                   ,
               lua_api_version
#endif
               );
        return 1;
      }
      else if(!strcmp(argv[k], "--library") && argc > k + 1)
      {
        dbfilename_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--datadir") && argc > k + 1)
      {
        datadir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--moduledir") && argc > k + 1)
      {
        moduledir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--tmpdir") && argc > k + 1)
      {
        tmpdir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--configdir") && argc > k + 1)
      {
        configdir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--cachedir") && argc > k + 1)
      {
        cachedir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--localedir") && argc > k + 1)
      {
        bindtextdomain(GETTEXT_PACKAGE, argv[++k]);
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(argv[k][1] == 'd' && argc > k + 1)
      {
        if(!strcmp(argv[k + 1], "all"))
          darktable.unmuted = 0xffffffff; // enable all debug information
        else if(!strcmp(argv[k + 1], "cache"))
          darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k + 1], "control"))
          darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k + 1], "dev"))
          darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k + 1], "input"))
          darktable.unmuted |= DT_DEBUG_INPUT; // input devices
        else if(!strcmp(argv[k + 1], "camctl"))
          darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k + 1], "perf"))
          darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k + 1], "pwstorage"))
          darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k + 1], "opencl"))
          darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl
        else if(!strcmp(argv[k + 1], "sql"))
          darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k + 1], "memory"))
          darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k + 1], "lighttable"))
          darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k + 1], "nan"))
          darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else if(!strcmp(argv[k + 1], "masks"))
          darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff.
        else if(!strcmp(argv[k + 1], "lua"))
          darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console
        else if(!strcmp(argv[k + 1], "print"))
          darktable.unmuted |= DT_DEBUG_PRINT; // print errors are reported on console
        else if(!strcmp(argv[k + 1], "camsupport"))
          darktable.unmuted |= DT_DEBUG_CAMERA_SUPPORT; // camera support warnings are reported on console
        else
          return usage(argv[0]);
        k++;
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(argv[k][1] == 't' && argc > k + 1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k + 1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k++;
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--conf") && argc > k + 1)
      {
        gchar *keyval = g_strdup(argv[++k]), *c = keyval;
        argv[k-1] = NULL;
        argv[k] = NULL;
        gchar *end = keyval + strlen(keyval);
        while(*c != '=' && c < end) c++;
        if(*c == '=' && *(c + 1) != '\0')
        {
          *c++ = '\0';
          dt_conf_string_entry_t *entry = (dt_conf_string_entry_t *)g_malloc(sizeof(dt_conf_string_entry_t));
          entry->key = g_strdup(keyval);
          entry->value = g_strdup(c);
          config_override = g_slist_append(config_override, entry);
        }
        g_free(keyval);
      }
      else if(!strcmp(argv[k], "--noiseprofiles") && argc > k + 1)
      {
        noiseprofiles_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--luacmd") && argc > k + 1)
      {
#ifdef USE_LUA
        lua_command = argv[++k];
#else
        ++k;
#endif
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--disable-opencl"))
      {
#ifdef HAVE_OPENCL
        exclude_opencl = TRUE;
#endif
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--"))
      {
        // "--" confuses the argument parser of glib/gtk. remove it.
        argv[k] = NULL;
        break;
      }
      else
        return usage(argv[0]); // fail on unrecognized options
    }
  }

  // remove the NULLs to not confuse gtk_init() later.
  for(int i = 1; i < argc; i++)
  {
    int k;
    for(k = i; k < argc; k++)
      if(argv[k] != NULL) break;

    if(k > i)
    {
      k -= i;
      for(int j = i + k; j < argc; j++)
      {
        argv[j-k] = argv[j];
        argv[j] = NULL;
      }
      argc -= k;
    }
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

  if(init_gui)
  {
    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // make sure that we have no stale global progress bar visible. thus it's run as early is possible
    dt_control_progress_init(darktable.control);
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    fprintf(stderr, "error: invalid temporary directory: %s\n", darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#ifdef USE_LUA
  dt_lua_init_early(L);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[PATH_MAX] = { 0 };
  dt_loc_get_user_config_dir(datadir, sizeof(datadir));
  char darktablerc[PATH_MAX] = { 0 };
  snprintf(darktablerc, sizeof(darktablerc), "%s/darktablerc", datadir);

  // initialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, darktablerc, config_override);
  g_slist_free_full(config_override, g_free);

  // set the interface language
  const gchar *lang = dt_conf_get_string("ui_last/gui_language");
#if defined(_WIN32)
  // get the default locale if no language preference was specified in the config file
  if(lang == NULL || lang[0] == '\0')
  {
    const wchar_t *wcLocaleName = NULL;
    wcLocaleName = dtwin_get_locale();
    if(wcLocaleName != NULL)
    {
      gchar *langLocale;
      langLocale = g_utf16_to_utf8(wcLocaleName, -1, NULL, NULL, NULL);
      if(langLocale != NULL)
      {
        g_free((gchar *)lang);
        lang = g_strdup(langLocale);
      }
    }
  }
#endif // defined (_WIN32)

  if(lang != NULL && lang[0] != '\0')
  {
    g_setenv("LANGUAGE", lang, 1);
    if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale();
    setlocale(LC_MESSAGES, lang);
    g_setenv("LANG", lang, 1);
  }
  g_free((gchar *)lang);

  // we need this REALLY early so that error messages can be shown, however after gtk_disable_setlocale
  if(init_gui)
  {
#ifdef GDK_WINDOWING_WAYLAND
    // There are currently bad interactions with Wayland (drop-downs
    // are very narrow, scroll events lost). Until this is fixed, give
    // priority to the XWayland backend for Wayland users.
    gdk_set_allowed_backends("x11,*");
#endif
    gtk_init(&argc, &argv);
  }

  // detect cpu features and decide which codepaths to enable
  dt_codepaths_init();

  // get the list of color profiles
  darktable.color_profiles = dt_colorspaces_init();

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command, load_data);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(!dt_database_get_lock_acquired(darktable.db))
  {
    gboolean image_loaded_elsewhere = FALSE;
#ifndef MAC_INTEGRATION
    // send the images to the other instance via dbus
    fprintf(stderr, "trying to open the images in the running instance\n");

    GDBusConnection *connection = NULL;
    for(int i = 1; i < argc; i++)
    {
      // make the filename absolute ...
      if(argv[i] == NULL || *argv[i] == '\0') continue;
      gchar *filename = dt_util_normalize_path(argv[i]);
      if(filename == NULL) continue;
      if(!connection) connection = g_bus_get_sync(G_BUS_TYPE_SESSION, NULL, NULL);
      // ... and send it to the running instance of darktable
      image_loaded_elsewhere = g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable",
                                                           "org.darktable.service.Remote", "Open",
                                                           g_variant_new("(s)", filename), NULL,
                                                           G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL) != NULL;
      g_free(filename);
    }
    if(connection) g_object_unref(connection);
#endif

    if(!image_loaded_elsewhere) dt_database_show_error(darktable.db);

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Make sure that the database and xmp files are in sync
  // We need conf and db to be up and running for that which is the case here.
  // FIXME: is this also useful in non-gui mode?
  GList *changed_xmp_files = NULL;
  if(init_gui && dt_conf_get_bool("run_crawler_on_start"))
  {
    changed_xmp_files = dt_control_crawler_run();
  }

  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
      dt_gui_presets_init(); // init preset db schema.
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection = dt_collection_new(NULL);

  /* initialize selection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

  // Initialize the password storage engine
  darktable.pwstorage = dt_pwstorage_new();

  darktable.guides = dt_guides_init();

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);

  // *SIGH*
  dt_set_signal_handlers();
#endif

  darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t));
#ifdef HAVE_OPENCL
  dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics);
#endif

  darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  darktable.noiseprofile_parser = dt_noiseprofile_init(noiseprofiles_from_command);

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui)) return 1;
    dt_bauhaus_init();
  }
  else
    darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  // check whether we were able to load darkroom view. if we failed, we'll crash everywhere later on.
  if(!darktable.develop) return 1;

  darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
#ifdef HAVE_GPHOTO2
    // Initialize the camera control.
    // this is done late so that the gui can react to the signal sent but before switching to lighttable!
    darktable.camctl = dt_camctl_new();
#endif

    darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_gui_gtk_load_config();

    // init the gui part of views
    dt_view_manager_gui_init(darktable.view_manager);
    // Loading the keybindings
    char keyfile[PATH_MAX] = { 0 };

    // First dump the default keymapping
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // initialize undo struct
    darktable.undo = dt_undo_init();
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  dt_image_local_copy_synch();

/* init lua last, since it's user made stuff it must be in the real environment */
#ifdef USE_LUA
  dt_lua_init(darktable.lua_state.state, lua_command);
#endif

  if(init_gui)
  {
    const char *mode = "lighttable";
    // april 1st: you have to earn using dt first! or know that you can switch views with keyboard shortcuts
    time_t now;
    time(&now);
    struct tm lt;
    localtime_r(&now, &lt);
    if(lt.tm_mon == 3 && lt.tm_mday == 1) mode = "knight";
    // we have to call dt_ctl_switch_mode_to() here already to not run into a lua deadlock.
    // having another call later is ok
    dt_ctl_switch_mode_to(mode);

#ifndef MAC_INTEGRATION
    // load image(s) specified on cmdline.
    // this has to happen after lua is initialized as image import can run lua code
    // If only one image is listed, attempt to load it in darkroom
    int last_id = 0;
    gboolean only_single_images = TRUE;
    int loaded_images = 0;

    for(int i = 1; i < argc; i++)
    {
      gboolean single_image = FALSE;
      if(argv[i] == NULL || *argv[i] == '\0') continue;
      int new_id = dt_load_from_string(argv[i], FALSE, &single_image);
      if(new_id > 0)
      {
        last_id = new_id;
        loaded_images++;
        if(!single_image) only_single_images = FALSE;
      }
    }

    if(loaded_images == 1 && only_single_images)
    {
      dt_control_set_mouse_over_id(last_id);
      dt_ctl_switch_mode_to("darkroom");
    }
#endif
  }

  // last but not least construct the popup that asks the user about images whose xmp files are newer than the
  // db entry
  if(init_gui && changed_xmp_files)
  {
    dt_control_crawler_show_image_list(changed_xmp_files);
  }

  dt_print(DT_DEBUG_CONTROL, "[init] startup took %f seconds\n", dt_get_wtime() - start_wtime);

  return 0;
}
Exemplo n.º 25
0
void sc_SetDenormalFlags()
{
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
	_mm_setcsr(_mm_getcsr() | 0x40); // DAZ
}
static void* HK_CALL hkWorkerThreadFunc(void *v)
{
    vHavokCpuJobThreadPool::WorkerThreadData& data = *static_cast<vHavokCpuJobThreadPool::WorkerThreadData*>(v);
    vHavokCpuJobThreadPool::SharedThreadData& sharedThreadData = *data.m_sharedThreadData;

    HK_THREAD_LOCAL_SET( hkThreadNumber, data.m_threadId);

#if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED
    // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero.
    // Typically operations on denormals are very slow, up to 100 times slower than normal numbers.
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

#ifdef HK_PLATFORM_XBOX360
    XSetThreadProcessor(GetCurrentThread(), data.m_hardwareThreadId );
#elif defined(HK_PLATFORM_WIN32) && !defined(_VISION_WINRT)
    SetThreadIdealProcessor(GetCurrentThread(), data.m_hardwareThreadId);
    // Can use SetThreadAffityMask to be more force-full.
#endif

    hkMemoryRouter memoryRouter;
    hkMemorySystem::getInstance().threadInit( memoryRouter, "vHavokCpuJobThreadPool" );
    hkBaseSystem::initThread( &memoryRouter );
    //hkUint32 allLockVal = 0;
    //hkReferencedObject::initThread(&allLockVal);

    if (sharedThreadData.m_timerBufferAllocation > 0)
    {
        // Allocate a monitor stream for this thread - this  enables timers.
        hkMonitorStream::getInstance().resize(sharedThreadData.m_timerBufferAllocation);
    }
    data.m_monitorStreamBegin = hkMonitorStream::getInstance().getStart();
    data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd();

    hkCheckDeterminismUtil::initThread();


    // VISION specific: Call any per thread callback
    if (sharedThreadData.m_OnWorkerThreadCreatedPtr)
    {
        sharedThreadData.m_CallbackProtect.enter();
        sharedThreadData.m_OnWorkerThreadCreatedPtr->TriggerCallbacks( /* data needed? */);
        sharedThreadData.m_CallbackProtect.leave();
    }
    // END  VISION specific

    // Wait for the main thread to release the worker thread
    data.m_semaphore.acquire();

    // The thread "main loop"
    while (data.m_killThread == false)
    {
        if (data.m_clearTimers)
        {
            hkMonitorStream::getInstance().reset();
            data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd();
            data.m_clearTimers = false;
        }

        const bool isNotPrimary = false;
        hkCheckDeterminismUtil::workerThreadStartFrame(isNotPrimary);

        // Enable timers for critical sections just during the step call
        hkCriticalSection::setTimersEnabled();

        sharedThreadData.m_jobQueue->processAllJobs();

        // Disable timers for critical sections just during the step call
        hkCriticalSection::setTimersDisabled();

        // Note collected timer data
        hkMonitorStream& stream = hkMonitorStream::getInstance();
        data.m_monitorStreamEnd = stream.getEnd();

        hkCheckDeterminismUtil::workerThreadFinishFrame();

        if( sharedThreadData.m_gcThreadMemoryOnCompletion )
        {
            hkMemorySystem::getInstance().garbageCollectThread( memoryRouter );
        }

        // Release any thread (usually the main thread) which may be waiting for all worker threads to finish.
        sharedThreadData.m_workerThreadFinished.release();

        // Immediately wait until the main thread releases the thread again
        data.m_semaphore.acquire();
    }


    // Perform cleanup operations

    // VISION specific: Call any per thread callback
    if (sharedThreadData.m_OnWorkerThreadFinishedPtr)
    {
        sharedThreadData.m_CallbackProtect.enter();
        sharedThreadData.m_OnWorkerThreadFinishedPtr->TriggerCallbacks( /* data needed? */);
        sharedThreadData.m_CallbackProtect.leave();
    }
    // END  VISION specific

    hkCheckDeterminismUtil::quitThread();

    hkBaseSystem::quitThread();
    hkMemorySystem::getInstance().threadQuit( memoryRouter );

    sharedThreadData.m_workerThreadFinished.release();

    return 0;
}
Exemplo n.º 27
0
int main()
	{
	
	
	printf("\n");
	printf("\n");
	printf("\n");
	printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n");
	printf(" Copyright (C) 2014 by Technical University of Denmark. All rights reserved.\n");
	printf("\n");
	printf(" HPMPC is distributed in the hope that it will be useful,\n");
	printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
	printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
	printf(" See the GNU Lesser General Public License for more details.\n");
	printf("\n");
	printf("\n");
	printf("\n");

	printf("Riccati solver performance test - single precision\n");
	printf("\n");

	// maximum frequency of the processor
	const float GHz_max = 2.9; //3.6; //2.9;
	printf("Frequency used to compute theoretical peak: %5.1f GHz (edit test_dricposv.c to modify this value).\n", GHz_max);
	printf("\n");

	// maximum flops per cycle, single precision
#if defined(TARGET_X64_AVX)
	const float flops_max = 16;
	printf("Testing solvers for AVX instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3)
	const float flops_max = 8;
	printf("Testing solvers for SSE3 instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_CORTEXA9)
	const float flops_max = 4;
	printf("Testing solvers for ARMv7a NEON instruction set: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_X86_ATOM)
	const float flops_max = 4;
	printf("Testing solvers for SSE3 instruction set, 32 bit, optimized for Intel Atom: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_POWERPC_G2)
	const float flops_max = 2;
	printf("Testing solvers for POWERPC instruction set, 32 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_C99_4X4)
	const float flops_max = 2;
	printf("Testing reference solvers, 4x4 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#elif defined(TARGET_C99_2X2)
	const float flops_max = 2;
	printf("Testing reference solvers, 2x2 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max);
#endif

	printf("\n");
	printf("Tested solvers:\n");
	printf("-sv : Riccati factorization and system solution (prediction step in IP methods)\n");
	printf("-trs: system solution after a previous call to Riccati factorization (correction step in IP methods)\n");
	printf("\n");
	printf("\n");

#if defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3)
	printf("\nflush to zero on\n");
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!!
#endif

	// to throw floating-point exception
/*#ifndef __APPLE__*/
/*    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);*/
/*#endif*/

	int err;
	
	int i, j, ii, jj, idx;
	
	const int bsd = D_MR; //d_get_mr();
	const int bss = S_MR; //s_get_mr();
	
	int info = 0;

	int nn[] = {4, 6, 8, 10, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300};
	int nnrep[] = {10000, 10000, 10000, 10000, 10000, 4000, 4000, 2000, 2000, 1000, 1000, 400, 400, 400, 200, 200, 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 40, 40, 40, 40, 40, 20, 20, 20, 20, 20, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10};
	
	int vnx[] = {8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 1024};
	int vnrep[] = {100, 100, 100, 100, 100, 100, 50, 50, 50, 20, 10, 10};
	int vN[] = {4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256};

	int ll;
	for(ll=0; ll<77; ll++)
/*	for(ll=0; ll<1; ll++)*/

		{

		int nx = nn[ll];//NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem)
		int nu = 2;//NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
		int N  = 10;//NN;//10; // horizon lenght
		int nrep = nnrep[ll];
/*		int nx = NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem)*/
/*		int nu = NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)*/
/*		int N  = NN;//10; // horizon lenght*/
/*		int nrep = NREP;*/

		int rep;
	
		int nz = nx+nu+1;
		int pnz = bss*((nz+bss-nu%bss+bss-1)/bss);
	
/************************************************
* dynamical system
************************************************/	

		double *A; d_zeros(&A, nx, nx); // states update matrix

		double *B; d_zeros(&B, nx, nu); // inputs matrix

		double *b; d_zeros(&b, nx, 1); // states offset
		double *x0; d_zeros(&x0, nx, 1); // initial state

		double Ts = 0.5; // sampling time
		mass_spring_system(Ts, nx, nu, N, A, B, b, x0);
	
		for(jj=0; jj<nx; jj++)
			b[jj] = 0.1;
	
		for(jj=0; jj<nx; jj++)
			x0[jj] = 0;
		x0[0] = 3.5;
		x0[1] = 3.5;
	
//	d_print_mat(nx, nx, A, nx);
//	d_print_mat(nx, nu, B, nx);
//	d_print_mat(nx, 1, b, nx);
//	d_print_mat(nx, 1, x0, nx);
	
	/* packed */
		double *BAb; d_zeros(&BAb, nx, nz);

		dmcopy(nx, nu, B, nx, BAb, nx);
		dmcopy(nx, nx, A, nx, BAb+nu*nx, nx);
		dmcopy(nx, 1 , b, nx, BAb+(nu+nx)*nx, nx);
	
//	d_print_mat(nx, nx+nu+1, BAb, nx);

	/* transposed */
		double *BAbt; d_zeros_align(&BAbt, pnz, pnz);
		for(ii=0; ii<nx; ii++)
			for(jj=0; jj<nz; jj++)
				{
				BAbt[jj+pnz*ii] = BAb[ii+nx*jj];
				}

//	d_print_mat(nz, nx+1, BAbt, pnz);
//	s_print_mat(nz, nx+1, sBAbt, pnz);
//	return 0;
	
	/* packed into contiguous memory */
		double *pBAbt; d_zeros_align(&pBAbt, pnz, pnz);
		d_cvt_mat2pmat(nz, nx, 0, bsd, BAbt, pnz, pBAbt, pnz);

		float *psBAbt; s_zeros_align(&psBAbt, pnz, pnz);
		s_cvt_d2s_pmat(nz, nx, bsd, pBAbt, pnz, bss, psBAbt, pnz);

//	d_print_pmat(nz, nx, bsd, pBAbt, pnz);
//	s_print_pmat(nz, nx, bss, spBAbt, pnz);

/************************************************
* cost function
************************************************/	

		double *Q; d_zeros_align(&Q, pnz, pnz);
		for(ii=0; ii<nu; ii++) Q[ii*(pnz+1)] = 2.0;
		for(; ii<pnz; ii++) Q[ii*(pnz+1)] = 1.0;
		for(ii=0; ii<nz; ii++) Q[nx+nu+ii*pnz] = 1.0;
		Q[(nx+nu)*(pnz+1)] = 1e6;

		/* packed into contiguous memory */
		float *pQ; s_zeros_align(&pQ, pnz, pnz);
		cvt_d2s_mat2pmat(nz, nz, 0, bss, Q, pnz, pQ, pnz);

	/* matrices series */
		float *(hpQ[N+1]);
		float *(hq[N+1]);
		float *(hux[N+1]);
		float *(hpi[N+1]);
		float *(hpBAbt[N]);
		float *(hrb[N]);
		float *(hrq[N+1]);
		for(jj=0; jj<N; jj++)
			{
			s_zeros_align(&hpQ[jj], pnz, pnz);
			s_zeros_align(&hq[jj], pnz, 1);
			s_zeros_align(&hux[jj], pnz, 1);
			s_zeros_align(&hpi[jj], nx, 1);
			hpBAbt[jj] = psBAbt;
			s_zeros_align(&hrb[jj], nx, 1);
			s_zeros_align(&hrq[jj], nx+nu, 1);
			}
		s_zeros_align(&hpQ[N], pnz, pnz);
		s_zeros_align(&hq[N], pnz, 1);
		s_zeros_align(&hux[N], pnz, 1);
		s_zeros_align(&hpi[N], nx, 1);
		s_zeros_align(&hrq[N], nx+nu, 1);
	
		// starting guess
		for(jj=0; jj<nx; jj++) hux[0][nu+jj] = (float) x0[jj];
	
		float *pL; s_zeros_align(&pL, pnz, pnz);
	
		float *pBAbtL; s_zeros_align(&pBAbtL, pnz, pnz);

/************************************************
* riccati-like iteration
************************************************/

		// predictor

		// restore cost function 
		for(ii=0; ii<N; ii++)
			{
			for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj];
			}
		for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj];

		// call the solver
		sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info);

		if(PRINTRES==1)
			{
			/* print result */
			printf("\n\nsv\n\n");
			for(ii=0; ii<N; ii++)
				s_print_mat(1, nu, hux[ii], 1);
			}
		if(PRINTRES==1 && COMPUTE_MULT==1)
			{
			// print result 
			printf("\n\nsv\n\n");
			for(ii=0; ii<N; ii++)
				s_print_mat(1, nx, hpi[ii+1], 1);
			}

		// corrector
	
		// clear solution 
		for(ii=0; ii<N; ii++)
			{
			for(jj=0; jj<nu; jj++) hux[ii][jj] = 0;
			for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0;
			}

		// restore linear part of cost function 
		for(ii=0; ii<N; ii++)
			{
			for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj];
			}
		for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj];

		// call the solver 
		sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi);

		if(PRINTRES==1)
			{
			// print result 
			printf("\n\ntrs\n\n");
			for(ii=0; ii<N; ii++)
				s_print_mat(1, nu, hux[ii], 1);
			}
		if(PRINTRES==1 && COMPUTE_MULT==1)
			{
			// print result 
			printf("\n\ntrs\n\n");
			for(ii=0; ii<N; ii++)
				s_print_mat(1, nx, hpi[ii+1], 1);
			}

		// restore cost function 
		for(ii=0; ii<N; ii++)
			{
			for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj];
			}
		for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj];

		// restore linear part of cost function 
		for(ii=0; ii<N; ii++)
			{
			for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj];
			}
		for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj];

		// residuals computation
		sres(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, hpi, hrq, hrb);

		if(PRINTRES==1 && COMPUTE_MULT==1)
			{
			// print result 
			printf("\n\nres\n\n");
			for(ii=0; ii<+N; ii++)
				s_print_mat(1, nx+nu, hrq[ii], 1);
			for(ii=0; ii<N; ii++)
				s_print_mat(1, nx, hrb[ii], 1);
			}



		// timing 
		struct timeval tv0, tv1, tv2;

		gettimeofday(&tv0, NULL); // start

		// double precision
		for(rep=0; rep<nrep; rep++)
			{
			// restore cost function 
			for(ii=0; ii<N; ii++)
				{
				for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj];
				}
			for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj];

			// call the solver 
			sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info);
			}
			
		gettimeofday(&tv1, NULL); // start

		for(rep=0; rep<nrep; rep++)
			{
			// clear solution 
			for(ii=0; ii<N; ii++)
				{
				for(jj=0; jj<nu; jj++) hux[ii][jj] = 0;
				for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0;
				}

			// restore linear part of cost function 
			for(ii=0; ii<N; ii++)
				{
				for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj];
				}
			for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj];

			// call the solver 
			sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi);
			}
		
		gettimeofday(&tv2, NULL); // start



		float time_sv = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);
		float flop_sv = (1.0/3.0*nx*nx*nx+3.0/2.0*nx*nx) + N*(7.0/3.0*nx*nx*nx+4.0*nx*nx*nu+2.0*nx*nu*nu+1.0/3.0*nu*nu*nu+13.0/2.0*nx*nx+9.0*nx*nu+5.0/2.0*nu*nu);
		if(COMPUTE_MULT==1)
			flop_sv += N*2*nx*nx;
		float Gflops_sv = 1e-9*flop_sv/time_sv;
	
		float time_trs = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6);
		float flop_trs = N*(8.0*nx*nx+8.0*nx*nu+2.0*nu*nu);
		if(COMPUTE_MULT==1)
			flop_trs += N*2*nx*nx;
		float Gflops_trs = 1e-9*flop_trs/time_trs;
		
		float Gflops_max = flops_max * GHz_max;

		if(ll==0)
			printf("\nnx\tnu\tN\tsv time\t\tsv Gflops\tsv \%\t\ttrs time\ttrs Gflops\ttrs \%\n\n");
		printf("%d\t%d\t%d\t%e\t%f\t%f\t%e\t%f\t%f\n", nx, nu, N, time_sv, Gflops_sv, 100.0*Gflops_sv/Gflops_max, time_trs, Gflops_trs, 100.0*Gflops_trs/Gflops_max);

/************************************************
* return
************************************************/

		free(A);
		free(B);
		free(b);
		free(x0);
		free(BAb);
		free(BAbt);
		free(pBAbt);
		free(Q);
		free(pQ);
		free(pL);
		free(pBAbtL);
		for(jj=0; jj<N; jj++)
			{
			free(hpQ[jj]);
			free(hq[jj]);
			free(hux[jj]);
			free(hpi[jj]);
			}
		free(hpQ[N]);
		free(hq[N]);
		free(hux[N]);
		free(hpi[N]);
	


		} // increase size

	printf("\n");
	printf("\n");
	printf("\n");

	return 0;

	}
Exemplo n.º 28
0
int dt_init(int argc, char *argv[], const int init_gui,lua_State *L)
{
#ifndef __WIN32__
  if(getuid() == 0 || geteuid() == 0)
    printf("WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n");
#endif

  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#if !defined __APPLE__ && !defined __WIN32__
  _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler);
#endif

#ifndef __GNUC_PREREQ
  // on OSX, gcc-4.6 and clang chokes if this is not here.
  #if defined __GNUC__ && defined __GNUC_MINOR__
  # define __GNUC_PREREQ(maj, min) \
  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  #else
  # define __GNUC_PREREQ(maj, min) 0
  #endif
#endif
#ifndef __has_builtin
// http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros
  #define __has_builtin(x) false
#endif

#ifndef __SSE3__
  #error "Unfortunately we depend on SSE3 instructions at this time."
  #error "Please contribute a backport patch (or buy a newer processor)."
#else
  #if (__GNUC_PREREQ(4,8) || __has_builtin(__builtin_cpu_supports))
  //FIXME: check will work only in GCC 4.8+ !!! implement manual cpuid check !!!
  //NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC
  if (!__builtin_cpu_supports("sse3"))
  {
    fprintf(stderr, "[dt_init] unfortunately we depend on SSE3 instructions at this time.\n");
    fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n");
    return 1;
  }
  #else
  //FIXME: no way to check for SSE3 in runtime, implement manual cpuid check !!!
  #endif
#endif

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */
#endif

  // we have to have our share dir in XDG_DATA_DIRS,
  // otherwise GTK+ won't find our logo for the about screen (and maybe other things)
  {
    const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS");
    gchar *new_xdg_data_dirs = NULL;
    gboolean set_env = TRUE;
    if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0')
    {
      // check if DARKTABLE_SHAREDIR is already in there
      gboolean found = FALSE;
      gchar **tokens = g_strsplit(xdg_data_dirs, ":", 0);
      // xdg_data_dirs is neither NULL nor empty => tokens != NULL
      for(char **iter = tokens; *iter != NULL; iter++)
        if(!strcmp(DARKTABLE_SHAREDIR, *iter))
        {
          found = TRUE;
          break;
        }
      g_strfreev(tokens);
      if(found)
        set_env = FALSE;
      else
        new_xdg_data_dirs = g_strjoin(":", DARKTABLE_SHAREDIR, xdg_data_dirs, NULL);
    }
    else
    {
      // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a default
      if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/") ||
         !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/"))
        new_xdg_data_dirs = g_strdup("/usr/local/share/:/usr/share/");
      else
        new_xdg_data_dirs = g_strdup_printf("%s:/usr/local/share/:/usr/share/", DARKTABLE_SHAREDIR);
    }

    if(set_env)
      g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1);
    g_free(new_xdg_data_dirs);
  }

  setlocale(LC_ALL, "");
  bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
  textdomain (GETTEXT_PACKAGE);


  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.progname = argv[0];

  // database
  gchar *dbfilename_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

#ifdef USE_LUA
  char *lua_command = NULL;
#endif

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *images_to_load = NULL, *config_override = NULL;
  for(int k=1; k<argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
        printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2014 johannes hanika\n"PACKAGE_BUGREPORT"\n"
#ifdef _OPENMP
        "OpenMP support enabled\n"
#else
        "OpenMP support disabled\n"
#endif
        );
        return 1;
      }
      else if(!strcmp(argv[k], "--library"))
      {
        dbfilename_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--datadir"))
      {
        datadir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--moduledir"))
      {
        moduledir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--tmpdir"))
      {
        tmpdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--configdir"))
      {
        configdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--cachedir"))
      {
        cachedir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--localedir"))
      {
        bindtextdomain (GETTEXT_PACKAGE, argv[++k]);
      }
      else if(argv[k][1] == 'd' && argc > k+1)
      {
        if(!strcmp(argv[k+1], "all"))             darktable.unmuted = 0xffffffff;   // enable all debug information
        else if(!strcmp(argv[k+1], "cache"))      darktable.unmuted |= DT_DEBUG_CACHE;   // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k+1], "control"))    darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k+1], "dev"))        darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k+1], "fswatch"))    darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module
        else if(!strcmp(argv[k+1], "input"))      darktable.unmuted |= DT_DEBUG_INPUT; // input devices
        else if(!strcmp(argv[k+1], "camctl"))     darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k+1], "perf"))       darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k+1], "pwstorage"))  darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k+1], "opencl"))     darktable.unmuted |= DT_DEBUG_OPENCL;    // gpu accel via opencl
        else if(!strcmp(argv[k+1], "sql"))        darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k+1], "memory"))     darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k+1], "nan"))        darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else if(!strcmp(argv[k+1], "masks"))      darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff.
        else if(!strcmp(argv[k+1], "lua"))        darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console
        else return usage(argv[0]);
        k ++;
      }
      else if(argv[k][1] == 't' && argc > k+1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k ++;
      }
      else if(!strcmp(argv[k], "--conf"))
      {
        gchar *keyval = g_strdup(argv[++k]), *c = keyval;
        gchar *end = keyval + strlen(keyval);
        while(*c != '=' && c < end) c++;
        if(*c == '=' && *(c+1) != '\0')
        {
          *c++ = '\0';
          dt_conf_string_entry_t *entry = (dt_conf_string_entry_t*)g_malloc(sizeof(dt_conf_string_entry_t));
          entry->key = g_strdup(keyval);
          entry->value = g_strdup(c);
          config_override = g_slist_append(config_override, entry);
        }
        g_free(keyval);
      }
      else if(!strcmp(argv[k], "--luacmd"))
      {
#ifdef USE_LUA
        lua_command = argv[++k];
#else
        ++k;
#endif
      }
    }
#ifndef MAC_INTEGRATION
    else
    {
      images_to_load = g_slist_append(images_to_load, argv[k]);
    }
#endif
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#if !GLIB_CHECK_VERSION(2, 35, 0)
  g_type_init();
#endif

  // does not work, as gtk is not inited yet.
  // even if it were, it's a super bad idea to invoke gtk stuff from
  // a signal handler.
  /* check cput caps */
  // dt_check_cpu(argc,argv);

#ifdef HAVE_GEGL
  char geglpath[PATH_MAX];
  char datadir[PATH_MAX];
  dt_loc_get_datadir(datadir, sizeof(datadir));
  snprintf(geglpath, sizeof(geglpath), "%s/gegl:/usr/lib/gegl-0.0", datadir);
  (void)setenv("GEGL_PATH", geglpath, 1);
  gegl_init(&argc, &argv);
#endif
#ifdef USE_LUA
  dt_lua_init_early(L);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[PATH_MAX];
  dt_loc_get_user_config_dir (datadir, sizeof(datadir));
  char filename[PATH_MAX];
  snprintf(filename, sizeof(filename), "%s/darktablerc", datadir);

  // initialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, filename, config_override);
  g_slist_free_full(config_override, g_free);

  // set the interface language
  const gchar* lang = dt_conf_get_string("ui_last/gui_language"); // we may not g_free 'lang' since it is owned by setlocale afterwards
  if(lang != NULL && lang[0] != '\0')
  {
    if(setlocale(LC_ALL, lang) != NULL)
      gtk_disable_setlocale();
  }

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(!dt_database_get_lock_acquired(darktable.db))
  {
    // send the images to the other instance via dbus
    if(images_to_load)
    {
      GSList *p = images_to_load;

      // get a connection!
      GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL);

      while (p != NULL)
      {
        // make the filename absolute ...
        gchar *filename = dt_make_path_absolute((gchar*)p->data);
        if(filename == NULL) continue;
        // ... and send it to the running instance of darktable
        g_dbus_connection_call_sync(connection,
                                    "org.darktable.service",
                                    "/darktable",
                                    "org.darktable.service.Remote",
                                    "Open",
                                    g_variant_new ("(s)", filename),
                                    NULL,
                                    G_DBUS_CALL_FLAGS_NONE,
                                    -1,
                                    NULL,
                                    NULL);
        p = g_slist_next(p);
        g_free(filename);
      }

      g_slist_free(images_to_load);
      g_object_unref(connection);
    }

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Make sure that the database and xmp files are in sync before starting the fswatch.
  // We need conf and db to be up and running for that which is the case here.
  // FIXME: is this also useful in non-gui mode?
  GList *changed_xmp_files = NULL;
  if(init_gui && dt_conf_get_bool("run_crawler_on_start"))
  {
    changed_xmp_files = dt_control_crawler_run();
  }

  // Initialize the filesystem watcher
  darktable.fswatch=dt_fswatch_new();

#ifdef HAVE_GPHOTO2
  // Initialize the camera control
  darktable.camctl=dt_camctl_new();
#endif

  // get max lighttable thumbnail size:
  darktable.thumbnail_width  = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"),  200, 3000);
  darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000);
  // and make sure it can be mip-mapped all the way from mip4 to mip0
  darktable.thumbnail_width  /= 16;
  darktable.thumbnail_width  *= 16;
  darktable.thumbnail_height /= 16;
  darktable.thumbnail_height *= 16;

  // Initialize the password storage engine
  darktable.pwstorage=dt_pwstorage_new();

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t));
  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
      dt_gui_presets_init(); // init preset db schema.
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection_listeners = NULL;
  darktable.collection = dt_collection_new(NULL);

  /* initialize selection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);
#endif

  darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t));
#ifdef HAVE_OPENCL
  dt_opencl_init(darktable.opencl, argc, argv);
#endif

  darktable.blendop = (dt_blendop_t *)calloc(1, sizeof(dt_blendop_t));
  dt_develop_blend_init(darktable.blendop);

  darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1;
    dt_bauhaus_init();
  }
  else darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
    darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_control_load_config(darktable.control);
  }

  if(init_gui)
  {
    // Loading the keybindings
    char keyfile[PATH_MAX];

    // First dump the default keymapping
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // initialize undo struct
    darktable.undo = dt_undo_init();

    // load image(s) specified on cmdline
    int id = 0;
    if(images_to_load)
    {
      // If only one image is listed, attempt to load it in darkroom
      gboolean load_in_dr = (g_slist_next(images_to_load) == NULL);
      GSList *p = images_to_load;

      while (p != NULL)
      {
        // don't put these function calls into MAX(), the macro will evaluate
        // it twice (and happily deadlock, in this particular case)
        int newid = dt_load_from_string((gchar*)p->data, load_in_dr);
        id = MAX(id, newid);
        p = g_slist_next(p);
      }

      if (!load_in_dr || id == 0)
        dt_ctl_switch_mode_to(DT_LIBRARY);

      g_slist_free(images_to_load);
    }
    else
      dt_ctl_switch_mode_to(DT_LIBRARY);
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  dt_image_local_copy_synch();

  /* init lua last, since it's user made stuff it must be in the real environment */
#ifdef USE_LUA
  dt_lua_init(darktable.lua_state.state,lua_command);
#endif

  // last but not least construct the popup that asks the user about images whose xmp files are newer than the db entry
  if(init_gui && changed_xmp_files)
  {
    dt_control_crawler_show_image_list(changed_xmp_files);
  }

  return 0;
}
Exemplo n.º 29
0
  TutorialApplication::TutorialApplication (const std::string& tutorialName, int features)

    : Application(features),
      tutorialName(tutorialName),

      shader(SHADER_DEFAULT),

      width(512),
      height(512),
      pixels(nullptr),

      outputImageFilename(""),

      skipBenchmarkFrames(0),
      numBenchmarkFrames(0),
      numBenchmarkRepetitions(1),

      interactive(true),
      fullscreen(false),

      window_width(512),
      window_height(512),
      windowID(0),

      time0(getSeconds()),
      debug_int0(0),
      debug_int1(0),

      mouseMode(0),
      clickX(0), clickY(0),
      speed(1.0f),
      moveDelta(zero),
      command_line_camera(false),
      print_frame_rate(false),
      avg_render_time(64,1.0),
      avg_frame_time(64,1.0),
      avg_mrayps(64,1.0),
      print_camera(false),

      debug0(0),
      debug1(0),
      debug2(0),
      debug3(0),

      iflags_coherent(RTC_INTERSECT_COHERENT),
      iflags_incoherent(RTC_INTERSECT_INCOHERENT)
  {
    /* only a single instance of this class is supported */
    assert(instance == nullptr);
    instance = this;

    /* for best performance set FTZ and DAZ flags in MXCSR control and status register */
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

    registerOption("c", [this] (Ref<ParseStream> cin, const FileName& path) {
        FileName file = path + cin->getFileName();
        parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path());
      }, "-c <filename>: parses command line option from <filename>");

    registerOption("o", [this] (Ref<ParseStream> cin, const FileName& path) {
        outputImageFilename = cin->getFileName();
        interactive = false;
      }, "-o <filename>: output image filename");

    /* camera settings */
    registerOption("vp", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.from = cin->getVec3fa();
        command_line_camera = true;
      }, "--vp <float> <float> <float>: camera position");

    registerOption("vi", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.to = cin->getVec3fa();
        command_line_camera = true;
      }, "--vi <float> <float> <float>: camera lookat position");

    registerOption("vd", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.to = camera.from + cin->getVec3fa();
        command_line_camera = true;
      }, "--vd <float> <float> <float>: camera direction vector");

    registerOption("vu", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.up = cin->getVec3fa();
        command_line_camera = true;
      }, "--vu <float> <float> <float>: camera up vector");

    registerOption("fov", [this] (Ref<ParseStream> cin, const FileName& path) {
        camera.fov = cin->getFloat();
        command_line_camera = true;
      }, "--fov <float>: vertical field of view");

    /* framebuffer settings */
    registerOption("size", [this] (Ref<ParseStream> cin, const FileName& path) {
        width = cin->getInt();
        height = cin->getInt();
      }, "--size <width> <height>: sets image size");

    registerOption("fullscreen", [this] (Ref<ParseStream> cin, const FileName& path) {
        fullscreen = true;
      }, "--fullscreen: starts in fullscreen mode");

    registerOption("benchmark", [this] (Ref<ParseStream> cin, const FileName& path) {
        skipBenchmarkFrames = cin->getInt();
        numBenchmarkFrames  = cin->getInt();
        if (cin->peek() != "" && cin->peek()[0] != '-')
          numBenchmarkRepetitions = cin->getInt();
        interactive = false;
        rtcore += ",benchmark=1,start_threads=1";
      }, "--benchmark <N> <M> <R>: enabled benchmark mode, builds scene, skips N frames, renders M frames, and repeats this R times");

    registerOption("nodisplay", [this] (Ref<ParseStream> cin, const FileName& path) {
        skipBenchmarkFrames = 0;
        numBenchmarkFrames  = 2048;
        interactive = false;
      }, "--nodisplay: enabled benchmark mode, continously renders frames");

    registerOption("print-frame-rate", [this] (Ref<ParseStream> cin, const FileName& path) {
        print_frame_rate = true;
      }, "--print-frame-rate: prints framerate for each frame on console");

     registerOption("print-camera", [this] (Ref<ParseStream> cin, const FileName& path) {
         print_camera = true;
      }, "--print-camera: prints camera for each frame on console");

     registerOption("debug0", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug0 = cin->getInt();
       }, "--debug0: sets internal debugging value");

     registerOption("debug1", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug1 = cin->getInt();
       }, "--debug1: sets internal debugging value");

     registerOption("debug2", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug2 = cin->getInt();
       }, "--debug2: sets internal debugging value");

     registerOption("debug3", [this] (Ref<ParseStream> cin, const FileName& path) {
         debug3 = cin->getInt();
       }, "--debug3: sets internal debugging value");

    /* output filename */
    registerOption("shader", [this] (Ref<ParseStream> cin, const FileName& path) {
        std::string mode = cin->getString();
        if      (mode == "default" ) shader = SHADER_DEFAULT;
        else if (mode == "eyelight") shader = SHADER_EYELIGHT;
        else if (mode == "occlusion") shader = SHADER_OCCLUSION;
        else if (mode == "uv"      ) shader = SHADER_UV;
        else if (mode == "texcoords") shader = SHADER_TEXCOORDS;
        else if (mode == "texcoords-grid") shader = SHADER_TEXCOORDS_GRID;
        else if (mode == "Ng"      ) shader = SHADER_NG;
        else if (mode == "cycles"  ) { shader = SHADER_CYCLES; scale = cin->getFloat(); }
        else if (mode == "geomID"  ) shader = SHADER_GEOMID;
        else if (mode == "primID"  ) shader = SHADER_GEOMID_PRIMID;
        else if (mode == "ao"      ) shader = SHADER_AMBIENT_OCCLUSION;
        else throw std::runtime_error("invalid shader:" +mode);
      },
      "--shader <string>: sets shader to use at startup\n"
      "  default: default tutorial shader\n"
      "  eyelight: eyelight shading\n"
      "  occlusion: occlusion shading\n"
      "  uv: uv debug shader\n"
      "  texcoords: texture coordinate debug shader\n"
      "  texcoords-grid: grid texture debug shader\n"
      "  Ng: visualization of shading normal\n"
      "  cycles <float>: CPU cycle visualization\n"
      "  geomID: visualization of geometry ID\n"
      "  primID: visualization of geometry and primitive ID\n"
      "  ao: ambient occlusion shader");

    if (features & FEATURE_STREAM)
    {
      /* register parsing of stream mode */
      registerOption("mode", [] (Ref<ParseStream> cin, const FileName& path) {
          std::string mode = cin->getString();
          if      (mode == "normal") g_mode = MODE_NORMAL;
          else if (mode == "stream") g_mode = MODE_STREAM;
          else throw std::runtime_error("invalid mode:" +mode);
        },
        "--mode: sets rendering mode\n"
        "  normal  : normal mode\n"
        "  stream  : stream mode\n");
    }

    registerOption("coherent", [this] (Ref<ParseStream> cin, const FileName& path) {
        g_iflags_coherent   = iflags_coherent   = RTC_INTERSECT_COHERENT;
        g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_COHERENT;
      }, "--coherent: force using RTC_INTERSECT_COHERENT hint when tracing rays");

    registerOption("incoherent", [this] (Ref<ParseStream> cin, const FileName& path) {
        g_iflags_coherent   = iflags_coherent   = RTC_INTERSECT_INCOHERENT;
        g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_INCOHERENT;
      }, "--incoherent: force using RTC_INTERSECT_INCOHERENT hint when tracing rays");
  }
Exemplo n.º 30
0
int main() {
    printf("\n");
    printf("\n");
    printf("\n");
    printf(
        " HPMPC -- Library for High-Performance implementation of solvers for "
        "MPC.\n");
    printf(
        " Copyright (C) 2014-2015 by Technical University of Denmark. All "
        "rights reserved.\n");
    printf("\n");
    printf(" HPMPC is distributed in the hope that it will be useful,\n");
    printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
    printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
    printf(" See the GNU Lesser General Public License for more details.\n");
    printf("\n");
    printf("\n");
    printf("\n");

#if defined(TARGET_X64_INTEL_HASWELL) ||      \
    defined(TARGET_X64_INTEL_SABDY_BRIDGE) || \
    defined(TARGET_X64_INTEL_CORE) || defined(TARGET_X86_AMD_BULLDOZER)
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);  // flush to zero subnormals !!!
                                                 // works only with one thread
                                                 // !!!
#endif

    int ii, jj;

    int rep, nrep = NREP;

    int nx = 8;   // number of states (it has to be even for the mass-spring
                  // system test problem)
    int nu = 3;   // number of inputs (controllers) (it has to be at least 1 and
                  // at most nx/2 for the mass-spring system test problem)
    int N = 15;   // horizon length
    int nb = 11;  // number of box constrained inputs and states
    int ng = 0;   // 4;  // number of general constraints
    int ngN = 4;  // 4;  // number of general constraints at the last stage

    //    int N2 = 3;   // horizon length of partially condensed problem

    int nbu = nu < nb ? nu : nb;
    int nbx = nb - nu > 0 ? nb - nu : 0;

    // stage-wise variant size
    int nxx[N + 1];
#if defined(ELIMINATE_X0)
    nxx[0] = 0;
#else
    nxx[0] = nx;
#endif
    for (ii = 1; ii <= N; ii++) nxx[ii] = nx;

    int nuu[N + 1];
    for (ii = 0; ii < N; ii++) nuu[ii] = nu;
    nuu[N] = 0;

    int nbb[N + 1];
#if defined(ELIMINATE_X0)
    nbb[0] = nbu;
#else
    nbb[0] = nb;
#endif
    for (ii = 1; ii < N; ii++) nbb[ii] = nb;
    nbb[N] = nbx;

    int ngg[N + 1];
    for (ii = 0; ii < N; ii++) ngg[ii] = ng;
    ngg[N] = ngN;

    printf(
        " Test problem: mass-spring system with %d masses and %d controls.\n",
        nx / 2, nu);
    printf("\n");
    printf(
        " MPC problem size: %d states, %d inputs, %d horizon length, %d "
        "two-sided box constraints, %d two-sided general constraints.\n",
        nx, nu, N, nb, ng);
    printf("\n");
    printf(
        " IP method parameters: predictor-corrector IP, double precision, %d "
        "maximum iterations, %5.1e exit tolerance in duality measure.\n",
        MAXITER, TOL);
    printf("\n");
#if defined(TARGET_X64_AVX2)
    printf(" HPMPC built for the AVX2 architecture\n");
#endif
#if defined(TARGET_X64_AVX)
    printf(" HPMPC built for the AVX architecture\n");
#endif
    printf("\n");

    /************************************************
     * dynamical system
     ************************************************/

    // state space matrices & initial state
    double *A;
    d_zeros(&A, nx, nx);  // states update matrix
    double *B;
    d_zeros(&B, nx, nu);  // inputs matrix
    double *b;
    d_zeros(&b, nx, 1);  // states offset
    double *x0;
    d_zeros(&x0, nx, 1);  // initial state

    // mass-spring system
    double Ts = 0.5;  // sampling time
    mass_spring_system(Ts, nx, nu, A, B, b, x0);

    for (jj = 0; jj < nx; jj++) b[jj] = 0.1;

    for (jj = 0; jj < nx; jj++) x0[jj] = 0;
    x0[0] = 2.5;
    x0[1] = 2.5;

//    d_print_mat(nx, nx, A, nx);
//    d_print_mat(nx, nu, B, nx);
//    d_print_mat(nx, 1, b, nx);
//    d_print_mat(nx, 1, x0, nx);

#if defined(ELIMINATE_X0)
    // compute b0 = b + A*x0
    double *b0;
    d_zeros(&b0, nx, 1);
    dcopy_3l(nx, b, 1, b0, 1);
    dgemv_n_3l(nx, nx, A, nx, x0, b0);
    //    d_print_mat(nx, 1, b, nx);
    //    d_print_mat(nx, 1, b0, nx);

    // then A0 is a matrix of size 0x0
    double *A0;
    d_zeros(&A0, 0, 0);
#endif

    /************************************************
     * box constraints
     ************************************************/

    int jj_end;

    int *idxb0;
    int_zeros(&idxb0, nbb[0], 1);
    double *lb0;
    d_zeros(&lb0, nbb[0], 1);
    double *ub0;
    d_zeros(&ub0, nbb[0], 1);
#if defined(ELIMINATE_X0)
    for (jj = 0; jj < nbb[0]; jj++) {
        lb0[jj] = -0.5;  // umin
        ub0[jj] = +0.5;  // umin
        idxb0[jj] = jj;
    }
#else
    jj_end = nbx < nbb[0] ? nbx : nbb[0];
    for (jj = 0; jj < jj_end; jj++) {
//        lb0[jj] = x0[jj - nbu];  // initial state
//        ub0[jj] = x0[jj - nbu];  // initial state
        lb0[jj] = x0[jj];  // initial state
        ub0[jj] = x0[jj];  // initial state
        idxb0[jj] = jj;
    }
    for (; jj < nbb[0]; jj++) {
        lb0[jj] = -0.5;  // umin
        ub0[jj] = +0.5;  // umax
        idxb0[jj] = jj;
    }
#endif
    //    int_print_mat(nbb[0], 1, idxb0, nbb[0]);
    //    d_print_mat(nbb[0], 1, lb0, nbb[0]);

    int *idxb1;
    int_zeros(&idxb1, nbb[1], 1);
    double *lb1;
    d_zeros(&lb1, nbb[1], 1);
    double *ub1;
    d_zeros(&ub1, nbb[1], 1);
    jj_end = nbx < nbb[1] ? nbx : nbb[1];
    for (jj = 0; jj < jj_end; jj++) {
        lb1[jj] = -4.0;  // xmin
        ub1[jj] = +4.0;  // xmax
        idxb1[jj] = jj;
    }
    for (; jj < nbb[1]; jj++) {
        lb1[jj] = -0.5;  // umin
        ub1[jj] = +0.5;  // umax
        idxb1[jj] = jj;
    }
    //    int_print_mat(nbb[1], 1, idxb1, nbb[1]);
    //    d_print_mat(nbb[1], 1, lb1, nbb[1]);

    int *idxbN;
    int_zeros(&idxbN, nbb[N], 1);
    double *lbN;
    d_zeros(&lbN, nbb[N], 1);
    double *ubN;
    d_zeros(&ubN, nbb[N], 1);
    jj_end = nbx < nbb[N] ? nbx : nbb[N];
    for (jj = 0; jj < jj_end; jj++) {
        lbN[jj] = -4.0;  // xmin
        ubN[jj] = +4.0;  // xmax
        idxbN[jj] = jj;
    }
    for (; jj < nbb[N]; jj++) {
        lbN[jj] = -0.5;  // umin
        ubN[jj] = +0.5;  // umax
        idxbN[jj] = jj;
    }
    //    int_print_mat(nbb[N], 1, idxbN, nbb[N]);
    //    d_print_mat(nbb[N], 1, lbN, nbb[N]);

    /************************************************
     * general constraints
     ************************************************/

    double *C;
    d_zeros(&C, ng, nx);
    double *D;
    d_zeros(&D, ng, nu);
    double *lg;
    d_zeros(&lg, ng, 1);
    double *ug;
    d_zeros(&ug, ng, 1);

    double *CN;
    d_zeros(&CN, ngN, nx);
    for (ii = 0; ii < ngN; ii++) CN[ii * (ngN + 1)] = 1.0;
    //    d_print_mat(ngN, nx, CN, ngN);
    double *lgN;
    d_zeros(&lgN, ngN, 1);  // force all states to 0 at the last stage
    double *ugN;
    d_zeros(&ugN, ngN, 1);  // force all states to 0 at the last stage

    /************************************************
     * cost function
     ************************************************/

    double *Q;
    d_zeros(&Q, nx, nx);
    for (ii = 0; ii < nx; ii++) Q[ii * (nx + 1)] = 1.0;

    double *R;
    d_zeros(&R, nu, nu);
    for (ii = 0; ii < nu; ii++) R[ii * (nu + 1)] = 2.0;

    double *S;
    d_zeros(&S, nu, nx);

    double *q;
    d_zeros(&q, nx, 1);
    for (ii = 0; ii < nx; ii++) q[ii] = 0.1;

    double *r;
    d_zeros(&r, nu, 1);
    for (ii = 0; ii < nu; ii++) r[ii] = 0.2;

#if defined(ELIMINATE_X0)
    // Q0 and q0 are matrices of size 0
    double *Q0;
    d_zeros(&Q0, 0, 0);
    double *q0;
    d_zeros(&q0, 0, 1);

    // compute r0 = r + S*x0
    double *r0;
    d_zeros(&r0, nu, 1);
    dcopy_3l(nu, r, 1, r0, 1);
    dgemv_n_3l(nu, nx, S, nu, x0, r0);

    // then S0 is a matrix of size nux0
    double *S0;
    d_zeros(&S0, nu, 0);
#endif

    /************************************************
     * problems data
     ************************************************/

    double *hA[N];
    double *hB[N];
    double *hb[N];
    double *hQ[N + 1];
    double *hS[N];
    double *hR[N];
    double *hq[N + 1];
    double *hr[N];
    double *hlb[N + 1];
    double *hub[N + 1];
    int *hidxb[N + 1];
    double *hC[N + 1];
    double *hD[N];
    double *hlg[N + 1];
    double *hug[N + 1];

#if defined(ELIMINATE_X0)
    hA[0] = A0;
    hb[0] = b0;
    hQ[0] = Q0;
    hS[0] = S0;
    hq[0] = q0;
    hr[0] = r0;
#else
    hA[0] = A;
    hb[0] = b;
    hQ[0] = Q;
    hS[0] = S;
    hq[0] = q;
    hr[0] = r;
#endif
    hB[0] = B;
    hR[0] = R;
    hlb[0] = lb0;
    hub[0] = ub0;
    hidxb[0] = idxb0;
    hC[0] = C;
    hD[0] = D;
    hlg[0] = lg;
    hug[0] = ug;
    for (ii = 1; ii < N; ii++) {
        hA[ii] = A;
        hB[ii] = B;
        hb[ii] = b;
        hQ[ii] = Q;
        hS[ii] = S;
        hR[ii] = R;
        hq[ii] = q;
        hr[ii] = r;
        hlb[ii] = lb1;
        hub[ii] = ub1;
        hidxb[ii] = idxb1;
        hC[ii] = C;
        hD[ii] = D;
        hlg[ii] = lg;
        hug[ii] = ug;
    }
    hQ[N] = Q;  // or maybe initialize to the solution of the DARE???
    hq[N] = q;  // or maybe initialize to the solution of the DARE???
    hlb[N] = lbN;
    hub[N] = ubN;
    hidxb[N] = idxbN;
    hC[N] = CN;
    hlg[N] = lgN;
    hug[N] = ugN;

    /************************************************
     * solution
     ************************************************/

    double *hx[N + 1];
    double *hu[N];
    double *hpi[N];
    double *hlam[N + 1];
    double *ht[N + 1];

    for (ii = 0; ii < N; ii++) {
        d_zeros(&hx[ii], nxx[ii], 1);
        d_zeros(&hu[ii], nuu[ii], 1);
        d_zeros(&hpi[ii], nxx[ii + 1], 1);
        d_zeros(&hlam[ii], 2 * nbb[ii] + 2 * ngg[ii], 1);
        d_zeros(&ht[ii], 2 * nbb[ii] + 2 * ngg[ii], 1);
    }
    d_zeros(&hx[N], nxx[N], 1);
    d_zeros(&hlam[N], 2 * nbb[N] + 2 * ngg[N], 1);
    d_zeros(&ht[N], 2 * nbb[N] + 2 * ngg[N], 1);

    /************************************************
     * create the in and out struct
     ************************************************/

    ocp_qp_in qp_in;
    qp_in.N = N;
    qp_in.nx = (const int *)nxx;
    qp_in.nu = (const int *)nuu;
    qp_in.nb = (const int *)nbb;
    qp_in.nc = (const int *)ngg;
    qp_in.A = (const double **)hA;
    qp_in.B = (const double **)hB;
    qp_in.b = (const double **)hb;
    qp_in.Q = (const double **)hQ;
    qp_in.S = (const double **)hS;
    qp_in.R = (const double **)hR;
    qp_in.q = (const double **)hq;
    qp_in.r = (const double **)hr;
    qp_in.idxb = (const int **)hidxb;
    qp_in.lb = (const double **)hlb;
    qp_in.ub = (const double **)hub;
    qp_in.Cx = (const double **)hC;
    qp_in.Cu = (const double **)hD;
    qp_in.lc = (const double **)hlg;
    qp_in.uc = (const double **)hug;

    ocp_qp_out qp_out;
    qp_out.x = hx;
    qp_out.u = hu;
    qp_out.pi = hpi;
    qp_out.lam = hlam;
    qp_out.t = ht;  // XXX why also the slack variables ???

    /************************************************
     * solver arguments (fully sparse)
     ************************************************/

    // solver arguments
    ocp_qp_condensing_hpipm_args *hpipm_args = ocp_qp_condensing_hpipm_create_arguments(&qp_in);
//    hpipm_args->mu_max = TOL;
//    hpipm_args->iter_max = MAXITER;
//    hpipm_args->alpha_min = MINSTEP;
    hpipm_args->mu0 = 1.0;  // 0.0

    /************************************************
     * work space (fully sparse)
     ************************************************/

    int work_space_size =
        ocp_qp_condensing_hpipm_calculate_workspace_size(&qp_in, hpipm_args);
    printf("\nwork space size: %d bytes\n", work_space_size);
    void *workspace = malloc(work_space_size);

    //    void *mem;
    //    ocp_qp_hpipm_create_memory(&qp_in, hpipm_args, &mem);
    int memory_size =
        ocp_qp_condensing_hpipm_calculate_memory_size(&qp_in, hpipm_args);
    printf("\nmemory: %d bytes\n", memory_size);
    void *memory = malloc(memory_size);

    ocp_qp_condensing_hpipm_memory *hpipm_memory =
        ocp_qp_condensing_hpipm_create_memory(&qp_in, hpipm_args);

    /************************************************
     * call the solver (fully sparse)
     ************************************************/

    int return_value;

    acados_timer timer;
    acados_tic(&timer);

    //  nrep = 1;
    for (rep = 0; rep < nrep; rep++) {
        // call the QP OCP solver
        //        return_value = ocp_qp_hpipm(&qp_in, &qp_out, hpipm_args,
        //        workspace);
        return_value =
            ocp_qp_condensing_hpipm(&qp_in, &qp_out, hpipm_args, hpipm_memory, workspace);
    }

    real_t time = acados_toc(&timer)/nrep;

    if (return_value == ACADOS_SUCCESS)
        printf("\nACADOS status: solution found in %d iterations\n",
               hpipm_memory->iter);

    if (return_value == ACADOS_MAXITER)
        printf("\nACADOS status: maximum number of iterations reached\n");

    if (return_value == ACADOS_MINSTEP)
        printf("\nACADOS status: below minimum step size length\n");

    printf("\nu = \n");
    for (ii = 0; ii < N; ii++) d_print_mat(1, nuu[ii], hu[ii], 1);

    printf("\nx = \n");
    for (ii = 0; ii <= N; ii++) d_print_mat(1, nxx[ii], hx[ii], 1);

    printf("\npi = \n");
    for (ii = 0; ii < N; ii++) d_print_mat(1, nxx[ii+1], hpi[ii], 1);

    printf("\nlam = \n");
    for (ii = 0; ii <= N; ii++) d_print_mat(1, 2*nbb[ii]+2*ngg[ii], hlam[ii], 1);

    printf("\n");
    printf(" inf norm res: %e, %e, %e, %e, %e\n", hpipm_memory->inf_norm_res[0],
           hpipm_memory->inf_norm_res[1], hpipm_memory->inf_norm_res[2],
           hpipm_memory->inf_norm_res[3], hpipm_memory->inf_norm_res[4]);
    printf("\n");
    printf(
        " Solution time for %d IPM iterations, averaged over %d runs: %5.2e "
        "seconds\n", hpipm_memory->iter, nrep, time);
    printf("\n\n");

    /************************************************
     * free memory
     ************************************************/

    d_free(A);
    d_free(B);
    d_free(b);
    d_free(x0);
    d_free(Q);
    d_free(S);
    d_free(R);
    d_free(q);
    d_free(r);
#if defined(ELIMINATE_X0)
    d_free(A0);
    d_free(b0);
    d_free(Q0);
    d_free(S0);
    d_free(q0);
    d_free(r0);
#endif
    int_free(idxb0);
    d_free(lb0);
    d_free(ub0);
    int_free(idxb1);
    d_free(lb1);
    d_free(ub1);
    int_free(idxbN);
    d_free(lbN);
    d_free(ubN);
    d_free(C);
    d_free(D);
    d_free(lg);
    d_free(ug);
    d_free(CN);
    d_free(lgN);
    d_free(ugN);

    for (ii = 0; ii < N; ii++) {
        d_free(hx[ii]);
        d_free(hu[ii]);
        d_free(hpi[ii]);
        d_free(hlam[ii]);
        d_free(ht[ii]);
    }
    d_free(hx[N]);
    d_free(hlam[N]);
    d_free(ht[N]);

    free(workspace);
    free(memory);

    return 0;
}