void Mixer::fifoWriter::run() { // set denormal protection for this thread #ifdef __SSE3__ /* DAZ flag */ _MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON ); #endif #ifdef __SSE__ /* FTZ flag */ _MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON ); #endif #if 0 #ifdef LMMS_BUILD_LINUX #ifdef LMMS_HAVE_SCHED_H cpu_set_t mask; CPU_ZERO( &mask ); CPU_SET( 0, &mask ); sched_setaffinity( 0, sizeof( mask ), &mask ); #endif #endif #endif const fpp_t frames = m_mixer->framesPerPeriod(); while( m_writing ) { surroundSampleFrame * buffer = new surroundSampleFrame[frames]; const surroundSampleFrame * b = m_mixer->renderNextBuffer(); memcpy( buffer, b, frames * sizeof( surroundSampleFrame ) ); m_fifo->write( buffer ); } m_fifo->write( NULL ); }
void ofx_activate_denormal_flush(){ #ifdef OFX_SIMD_USE_SSE //mode to flush denormals values, needed for fast multiplication _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif };
void TimeLagFilterCore::DTCalcThread::run(){ //Disable denormalized floats _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); //Start float b, c; while(!threadShouldExit()){ if(!core.paramsChanged) wait(-1); if(threadShouldExit()) return; if(core.sampleSwapDT){ //Don't recalculate if waiting for sample to finish wait(1); //Try again soon }else{ //Copy write to calc { const ScopedWriteLock writeLock(core.ctLock); memcpy(core.ct_calc, core.ct_write, core.num_filters * sizeof (CTParams)); core.paramsChanged = false; } //Calculate filter coefficients const ScopedWriteLock writeLock(core.dtLock); for(int i=0; i<core.num_filters; ++i){ filtercalculations(core.reduced_fs, core.ct_calc[i].center, core.ct_calc[i].bw, &b, &c); core.dt_calc[i].b = b; core.dt_calc[i].c = c; } core.sampleSwapDT = true; } } }
/* main function in embree namespace */ int main(int argc, char** argv) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /* set default camera */ g_camera.from = Vec3fa(2.5f,2.5f,2.5f); g_camera.to = Vec3fa(0.0f,0.0f,0.0f); /* create stream for parsing */ Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv)); /* parse command line */ parseCommandLine(stream, FileName()); if (g_numThreads) g_rtcore += ",threads=" + toString(g_numThreads); /* initialize ray tracing core */ init(g_rtcore.c_str()); /* render to disk */ if (outFilename.str() != "") { renderToFile(outFilename); return 0; } /* initialize GLUT */ initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen); /* enter the GLUT run loop */ enterWindowRunLoop(); return 0; }
/* main function in embree namespace */ int main(int argc, char** argv) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /* initialize ray tracing core and force bvh4.triangle4v hierarchy for triangles */ rtcInit("tri_accel=bvh4.triangle4v"); /* set error handler */ rtcSetErrorFunction(error_handler); /* create scene */ g_scene = rtcNewScene(RTC_SCENE_STATIC,RTC_INTERSECT1); addCube(g_scene,Vec3fa(-1,0,0)); addCube(g_scene,Vec3fa(1,0,0)); addCube(g_scene,Vec3fa(0,0,-1)); addCube(g_scene,Vec3fa(0,0,1)); addHair(g_scene); addGroundPlane(g_scene); rtcCommit (g_scene); /* print triangle BVH */ print_bvh(g_scene); /* cleanup */ rtcDeleteScene (g_scene); rtcExit(); return 0; }
/* main function in embree namespace */ int main(int argc, char** argv) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /* create stream for parsing */ Ref<ParseStream> stream = new ParseStream(new CommandLineStream(argc, argv)); /* parse command line */ parseCommandLine(stream, FileName()); /* load default scene if none specified */ if (filename.ext() == "") { FileName file = FileName::executableFolder() + FileName("models/cornell_box.ecs"); parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path()); } /* configure number of threads */ if (g_numThreads) g_rtcore += ",threads=" + std::to_string((long long)g_numThreads); if (g_numBenchmarkFrames) g_rtcore += ",benchmark=1"; g_rtcore += g_subdiv_mode; /* load scene */ if (strlwr(filename.ext()) == std::string("obj")) { g_scene->add(loadOBJ(filename,g_subdiv_mode != "")); } else if (strlwr(filename.ext()) == std::string("xml")) { g_scene->add(loadXML(filename,one)); } else if (filename.ext() != "") THROW_RUNTIME_ERROR("invalid scene type: "+strlwr(filename.ext())); /* initialize ray tracing core */ init(g_rtcore.c_str()); /* send model */ g_obj_scene.add(g_scene.dynamicCast<SceneGraph::Node>(),g_instancing_mode); g_scene = nullptr; set_scene(&g_obj_scene); /* benchmark mode */ if (g_numBenchmarkFrames) renderBenchmark(outFilename); /* render to disk */ if (outFilename.str() != "") renderToFile(outFilename); /* interactive mode */ if (g_interactive) { initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen); enterWindowRunLoop(g_anim_mode); } return 0; }
int main(int argc, char **argv) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); std::cout << " === Possible cmd line options: -pregenerate, -cache === " << std::endl; /* set default camera */ g_camera.from = Vec3fa(1.5f,1.5f,-1.5f); g_camera.to = Vec3fa(0.0f,0.0f,0.0f); /*! Parse command line options. */ parseCommandLine(new ParseStream(new CommandLineStream(argc, argv)), FileName()); /*! Set the thread count in the Embree configuration string. */ if (g_numThreads) g_rtcore += ",threads=" + std::to_string((long long)g_numThreads); g_rtcore += g_subdiv_mode; /*! Initialize Embree state. */ init(g_rtcore.c_str()); /* render to disk */ if (outFilename.str() != "") renderToFile(outFilename); /* interactive mode */ if (g_interactive) { initWindowState(argc,argv,tutorialName, g_width, g_height, g_fullscreen); enterWindowRunLoop(); } return 0; }
void nova_server::prepare_backend(void) { /* register audio backend ports */ const int blocksize = get_audio_blocksize(); const int input_channels = get_input_count(); const int output_channels = get_output_count(); std::vector<sample*> inputs, outputs; for (int channel = 0; channel != input_channels; ++channel) inputs.push_back(sc_factory->world.mAudioBus + (blocksize * (output_channels + channel))); audio_backend::input_mapping(inputs.begin(), inputs.end()); for (int channel = 0; channel != output_channels; ++channel) outputs.push_back(sc_factory->world.mAudioBus + blocksize * channel); audio_backend::output_mapping(outputs.begin(), outputs.end()); #ifdef __SSE__ /* denormal handling */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _mm_setcsr(_mm_getcsr() | 0x40); #endif time_per_tick = time_tag::from_samples(blocksize, get_samplerate()); }
CWorld::CWorld(void) { m_pWorld = 0; m_pMemoryRouter = 0; m_collisionFilter = 0; m_pCell = 0; m_suspended = 0; _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); m_pMemoryRouter = hkMemoryInitUtil::initDefault( hkMallocAllocator::m_defaultMallocAllocator, hkMemorySystem::FrameInfo( 128 * 1024 * 1024 ) ); hkBaseSystem::init( m_pMemoryRouter, errorReport ); m_threadInit.insert(GetCurrentThreadId()); int numThreads = 1; hkHardwareInfo hwInfo; hkGetHardwareInfo(hwInfo); numThreads = hwInfo.m_numThreads*2; LogInfo("System run with %d threads", numThreads); hkJobQueueCinfo jobQueueInfo; jobQueueInfo.m_jobQueueHwSetup.m_numCpuThreads = numThreads+1; m_jobQueue = new hkJobQueue(jobQueueInfo); hkCpuJobThreadPoolCinfo jobPoolInfo; jobPoolInfo.m_numThreads = numThreads; m_jobThreadPool = new hkCpuJobThreadPool(jobPoolInfo); hkpWorldCinfo info; info.m_gravity.set(0, 0, -9.8); //info.m_gravity.set(0, 0, 0); info.setBroadPhaseWorldSize(1e+6); info.m_broadPhaseType = hkpWorldCinfo::BROADPHASE_TYPE_SAP; info.m_broadPhaseBorderBehaviour = hkpWorldCinfo::BROADPHASE_BORDER_DO_NOTHING; info.setupSolverInfo(hkpWorldCinfo::SOLVER_TYPE_8ITERS_HARD); info.m_simulationType = hkpWorldCinfo::SIMULATION_TYPE_MULTITHREADED; info.m_enableDeactivation = false; m_pWorld = new hkpWorld(info); m_pWorld->markForWrite(); auto* pFilter = new hkpConstraintCollisionFilter(new MyGroupFilter); m_collisionFilter = pFilter; m_pWorld->setCollisionFilter(pFilter); pFilter->init(m_pWorld); hkpWorld::registerWithJobQueue(m_jobQueue); hkpConstraintStabilizationUtil::setConstraintsSolvingMethod(m_pWorld, hkpConstraintAtom::METHOD_STABILIZED); hkpAgentRegisterUtil::registerAllAgents(m_pWorld->getCollisionDispatcher()); m_pWorld->unmarkForWrite(); m_timeLastUpdate = *timeStamp; LogInfo("Havok simulated world created."); }
int main(int argc, char* argv[]) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); ispcEntry(); return 0; }
bool Context::setFlushDenormal(bool on) { #ifdef USE_SSE3 // Setting flush-to-zero (FTZ) flag _MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF); // Setting denormals-are-zero (DAZ) flag _MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF); return true; #else return false; #endif }
int main(int argc, char* argv[]) { /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); /* create new Embree device */ RTCDevice device = rtcNewDevice("verbose=1"); /* ddelete device again */ rtcDeleteDevice(device); return 0; }
void initMain(int argc, char** argv) { installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; line += ' '; } #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::ParseCommandLineFlags(&argc, &argv, true); initializeLogging(argc, argv); LOG(INFO) << "commandline: " << line; CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; installProfilerSwitch(); #ifdef __SSE__ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif #ifdef __SSE3__ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); #endif if (FLAGS_seed == 0) { unsigned int t = time(NULL); srand(t); ThreadLocalRand::initSeed(t); LOG(INFO) << "random number seed=" << t; } else { srand(FLAGS_seed); ThreadLocalRand::initSeed(FLAGS_seed); } if (FLAGS_use_gpu) { // This is the initialization of the CUDA environment, // need before runInitFunctions. // TODO(hedaoyuan) Can be considered in the runInitFunctions, // but to ensure that it is the first to initialize. hl_start(); hl_init(FLAGS_gpu_id); } version::printVersion(); checkCPUFeature().check(); runInitFunctions(); }
void _initialize_cpu_thread () { debug_on_thread_spawn (); #ifndef XRCORE_STATIC // fpu & sse FPU::m24r (); #endif // XRCORE_STATIC if (CPU::ID.feature&_CPU_FEATURE_SSE) { //_mm_setcsr ( _mm_getcsr() | (_MM_FLUSH_ZERO_ON+_MM_DENORMALS_ZERO_ON) ); _MM_SET_FLUSH_ZERO_MODE (_MM_FLUSH_ZERO_ON); if (_denormals_are_zero_supported) { __try { _MM_SET_DENORMALS_ZERO_MODE (_MM_DENORMALS_ZERO_ON); } __except(EXCEPTION_EXECUTE_HANDLER) { _denormals_are_zero_supported = FALSE; } } }
void set_ftz(){ #if defined(__i386__) || defined(__x86_64__) #ifndef _CRAYC _MM_SET_FLUSH_ZERO_MODE (_MM_FLUSH_ZERO_ON); #endif #elif defined(__PPC__) || defined(__PPC64__) // Altivec non-IEEE mode for subnormal (denormalized) values. // m*vscr requires vector types even for writing to registers (disturbing) // so the high order bits are index'd. vector unsigned short vscr = vec_mfvscr(); vscr[1] |= 1; // (1<<16) in reg vec_mtvscr(vscr); #endif }
void TimeLagFilterCore::DlyCalcThread::run(){ //Disable denormalized floats _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); //Start float phasesum, groupsum; float a, b, center, bw, ctrsquared; float phase, group; float w, wsquared, twow, dw; int f, nw; while(!threadShouldExit()){ if(!core.paramsChangedDelay) wait(-1); if(threadShouldExit()) return; { const ScopedReadLock readLock(core.ctLock); //Calculate delays dw = core.getMaxCtr() / ResponseGraph::NUM_RESP_W; w = 0.0f; for(nw = 0; nw < ResponseGraph::NUM_RESP_W; ++nw){ phasesum = 0.0f; groupsum = 0.0f; wsquared = w * w; twow = 2.0f * w; for(f=0; f<core.num_filters; ++f){ center = core.ct_write[f].center; bw = core.ct_write[f].bw; ctrsquared = center * center; a = ctrsquared - wsquared; b = twow * bw * center; phase = -2.0f * atan2(b, a); group = -4.0f * bw * center * (ctrsquared - (core.getMaxBW() * wsquared)) / (a*a + b*b); phasesum += phase; groupsum += group; } core.phasedelay[nw] = phasesum; core.groupdelay[nw] = groupsum; w += dw; } core.paramsChangedDelay = false; } } }
void MixerWorkerThread::run() { // set denormal protection for this thread #ifdef __SSE3__ /* DAZ flag */ _MM_SET_DENORMALS_ZERO_MODE( _MM_DENORMALS_ZERO_ON ); #endif #ifdef __SSE__ /* FTZ flag */ _MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON ); #endif QMutex m; while( m_quit == false ) { m.lock(); queueReadyWaitCond->wait( &m ); globalJobQueue.run(); m.unlock(); } }
extern void HK_CALL DemoPlatformInit(hkDemoFrameworkOptions*) { #if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero. // Typically operations on denormals are very slow, up to 100 times slower than normal numbers. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif HANDLE thread = GetCurrentThread(); SetThreadIdealProcessor(thread, 0); // try to keep on single core // can use XP/Vista/Server2003 etc SetThreadAffinityMask too if we want to be more forceful // XAudio requires COM init // If the following line does not compile, it means that windows.h was included with // a lot for #defines to cut down linkage. A full windows.h include is required, with winnt ver >= 4. // In the demos this is done on the PCH, demos.h, before any Havok base windows includes. CoInitializeEx(0, COINIT_MULTITHREADED); }
void FLA_Init() { if ( FLA_initialized == TRUE ) return; FLA_initialized = TRUE; FLA_Error_messages_init(); FLA_Memory_leak_counter_init(); FLA_Init_constants(); FLA_Cntl_init(); #if FLA_VECTOR_INTRINSIC_TYPE == FLA_SSE_INTRINSICS _MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON ); #endif #ifdef FLA_ENABLE_SUPERMATRIX FLASH_Queue_init(); #endif }
void RayEngine::embreeInit() { cout << "Starting Embree..." << endl; // Init library Embree.device = rtcNewDevice(NULL); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); // Generate texture glGenTextures(1, &Embree.texture); glBindTexture(GL_TEXTURE_2D, Embree.texture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); // Init scenes userData = this; for (uint i = 0; i < scenes.size(); i++) scenes[i]->embreeInit(Embree.device); }
void CWorld::DoUpdate() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); if(m_suspended) return; //ScanHair(); m_lock.lock(); auto currTime = m_useSeperatedClock ? clock()*0.001 : *timeStamp; auto interval = currTime - m_timeLastUpdate; if(interval > TIME_TICK * 0.5) { m_timeLastUpdate = currTime; ScanCell(); //if(m_savedDeltaTime > TIME_TICK_US*2) m_savedDeltaTime = TIME_TICK_US*2; StepWorld(interval); } hkSkyrimMemoryAllocator::releaseAll(); m_lock.unlock(); }
/* exported for Rembedded.h */ void fpu_setup(Rboolean start) { if (start) { #ifdef __FreeBSD__ fpsetmask(0); #endif #ifdef NEED___SETFPUCW __setfpucw(_FPU_IEEE); #endif #if (defined(__i386) || defined(__x86_64)) && defined(__INTEL_COMPILER) && __INTEL_COMPILER > 800 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF); #endif } else { #ifdef __FreeBSD__ fpsetmask(~0); #endif #ifdef NEED___SETFPUCW __setfpucw(_FPU_DEFAULT); #endif } }
void sc_SetDenormalFlags() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); }
int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load_data, lua_State *L) { double start_wtime = dt_get_wtime(); #ifndef __WIN32__ if(getuid() == 0 || geteuid() == 0) printf( "WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n"); #endif #if defined(__SSE__) // make everything go a lot faster. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif dt_set_signal_handlers(); #include "is_supported_platform.h" int sse2_supported = 0; #ifdef HAVE_BUILTIN_CPU_SUPPORTS // NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC __builtin_cpu_init(); sse2_supported = __builtin_cpu_supports("sse2"); #else sse2_supported = dt_detect_cpu_features() & CPU_FLAG_SSE2; #endif if(!sse2_supported) { fprintf(stderr, "[dt_init] SSE2 instruction set is unavailable.\n"); fprintf(stderr, "[dt_init] expect a LOT of functionality to be broken. you have been warned.\n"); } #ifdef M_MMAP_THRESHOLD mallopt(M_MMAP_THRESHOLD, 128 * 1024); /* use mmap() for large allocations */ #endif // make sure that stack/frame limits are good (musl) dt_set_rlimits(); // we have to have our share dir in XDG_DATA_DIRS, // otherwise GTK+ won't find our logo for the about screen (and maybe other things) { const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS"); gchar *new_xdg_data_dirs = NULL; gboolean set_env = TRUE; if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0') { // check if DARKTABLE_SHAREDIR is already in there gboolean found = FALSE; gchar **tokens = g_strsplit(xdg_data_dirs, G_SEARCHPATH_SEPARATOR_S, 0); // xdg_data_dirs is neither NULL nor empty => tokens != NULL for(char **iter = tokens; *iter != NULL; iter++) if(!strcmp(DARKTABLE_SHAREDIR, *iter)) { found = TRUE; break; } g_strfreev(tokens); if(found) set_env = FALSE; else new_xdg_data_dirs = g_strjoin(G_SEARCHPATH_SEPARATOR_S, DARKTABLE_SHAREDIR, xdg_data_dirs, NULL); } else { #ifndef _WIN32 // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a // default if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/")) new_xdg_data_dirs = g_strdup("/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/"); else new_xdg_data_dirs = g_strdup_printf("%s" G_SEARCHPATH_SEPARATOR_S "/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/", DARKTABLE_SHAREDIR); #else set_env = FALSE; #endif } if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1); g_free(new_xdg_data_dirs); } setlocale(LC_ALL, ""); bindtextdomain(GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR); bind_textdomain_codeset(GETTEXT_PACKAGE, "UTF-8"); textdomain(GETTEXT_PACKAGE); // init all pointers to 0: memset(&darktable, 0, sizeof(darktable_t)); darktable.start_wtime = start_wtime; darktable.progname = argv[0]; // FIXME: move there into dt_database_t dt_pthread_mutex_init(&(darktable.db_insert), NULL); dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL); dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL); darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t)); // database char *dbfilename_from_command = NULL; char *noiseprofiles_from_command = NULL; char *datadir_from_command = NULL; char *moduledir_from_command = NULL; char *tmpdir_from_command = NULL; char *configdir_from_command = NULL; char *cachedir_from_command = NULL; #ifdef HAVE_OPENCL gboolean exclude_opencl = FALSE; gboolean print_statistics = strcmp(argv[0], "darktable-cltest"); #endif #ifdef USE_LUA char *lua_command = NULL; #endif darktable.num_openmp_threads = 1; #ifdef _OPENMP darktable.num_openmp_threads = omp_get_num_procs(); #endif darktable.unmuted = 0; GSList *config_override = NULL; for(int k = 1; k < argc; k++) { if(argv[k][0] == '-') { if(!strcmp(argv[k], "--help")) { return usage(argv[0]); } if(!strcmp(argv[k], "-h")) { return usage(argv[0]); } else if(!strcmp(argv[k], "--version")) { #ifdef USE_LUA const char *lua_api_version = strcmp(LUA_API_VERSION_SUFFIX, "") ? STR(LUA_API_VERSION_MAJOR) "." STR(LUA_API_VERSION_MINOR) "." STR(LUA_API_VERSION_PATCH) "-" LUA_API_VERSION_SUFFIX : STR(LUA_API_VERSION_MAJOR) "." STR(LUA_API_VERSION_MINOR) "." STR(LUA_API_VERSION_PATCH); #endif printf("this is %s\ncopyright (c) 2009-%s johannes hanika\n" PACKAGE_BUGREPORT "\n\ncompile options:\n" " bit depth is %s\n" #ifdef _DEBUG " debug build\n" #else " normal build\n" #endif #if defined(__SSE2__) && defined(__SSE__) " SSE2 optimized codepath enabled\n" #else " SSE2 optimized codepath disabled\n" #endif #ifdef _OPENMP " OpenMP support enabled\n" #else " OpenMP support disabled\n" #endif #ifdef HAVE_OPENCL " OpenCL support enabled\n" #else " OpenCL support disabled\n" #endif #ifdef USE_LUA " Lua support enabled, API version %s\n" #else " Lua support disabled\n" #endif #ifdef USE_COLORDGTK " Colord support enabled\n" #else " Colord support disabled\n" #endif #ifdef HAVE_GPHOTO2 " gPhoto2 support enabled\n" #else " gPhoto2 support disabled\n" #endif #ifdef HAVE_GRAPHICSMAGICK " GraphicsMagick support enabled\n" #else " GraphicsMagick support disabled\n" #endif #ifdef HAVE_OPENEXR " OpenEXR support enabled\n" #else " OpenEXR support disabled\n" #endif , darktable_package_string, darktable_last_commit_year, (sizeof(void *) == 8 ? "64 bit" : sizeof(void *) == 4 ? "32 bit" : "unknown") #if USE_LUA , lua_api_version #endif ); return 1; } else if(!strcmp(argv[k], "--library") && argc > k + 1) { dbfilename_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--datadir") && argc > k + 1) { datadir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--moduledir") && argc > k + 1) { moduledir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--tmpdir") && argc > k + 1) { tmpdir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--configdir") && argc > k + 1) { configdir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--cachedir") && argc > k + 1) { cachedir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--localedir") && argc > k + 1) { bindtextdomain(GETTEXT_PACKAGE, argv[++k]); argv[k-1] = NULL; argv[k] = NULL; } else if(argv[k][1] == 'd' && argc > k + 1) { if(!strcmp(argv[k + 1], "all")) darktable.unmuted = 0xffffffff; // enable all debug information else if(!strcmp(argv[k + 1], "cache")) darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module else if(!strcmp(argv[k + 1], "control")) darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module else if(!strcmp(argv[k + 1], "dev")) darktable.unmuted |= DT_DEBUG_DEV; // develop module else if(!strcmp(argv[k + 1], "input")) darktable.unmuted |= DT_DEBUG_INPUT; // input devices else if(!strcmp(argv[k + 1], "camctl")) darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module else if(!strcmp(argv[k + 1], "perf")) darktable.unmuted |= DT_DEBUG_PERF; // performance measurements else if(!strcmp(argv[k + 1], "pwstorage")) darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module else if(!strcmp(argv[k + 1], "opencl")) darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl else if(!strcmp(argv[k + 1], "sql")) darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries else if(!strcmp(argv[k + 1], "memory")) darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then. else if(!strcmp(argv[k + 1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff. else if(!strcmp(argv[k + 1], "nan")) darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe. else if(!strcmp(argv[k + 1], "masks")) darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff. else if(!strcmp(argv[k + 1], "lua")) darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console else if(!strcmp(argv[k + 1], "print")) darktable.unmuted |= DT_DEBUG_PRINT; // print errors are reported on console else if(!strcmp(argv[k + 1], "camsupport")) darktable.unmuted |= DT_DEBUG_CAMERA_SUPPORT; // camera support warnings are reported on console else return usage(argv[0]); k++; argv[k-1] = NULL; argv[k] = NULL; } else if(argv[k][1] == 't' && argc > k + 1) { darktable.num_openmp_threads = CLAMP(atol(argv[k + 1]), 1, 100); printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads); k++; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--conf") && argc > k + 1) { gchar *keyval = g_strdup(argv[++k]), *c = keyval; argv[k-1] = NULL; argv[k] = NULL; gchar *end = keyval + strlen(keyval); while(*c != '=' && c < end) c++; if(*c == '=' && *(c + 1) != '\0') { *c++ = '\0'; dt_conf_string_entry_t *entry = (dt_conf_string_entry_t *)g_malloc(sizeof(dt_conf_string_entry_t)); entry->key = g_strdup(keyval); entry->value = g_strdup(c); config_override = g_slist_append(config_override, entry); } g_free(keyval); } else if(!strcmp(argv[k], "--noiseprofiles") && argc > k + 1) { noiseprofiles_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--luacmd") && argc > k + 1) { #ifdef USE_LUA lua_command = argv[++k]; #else ++k; #endif argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--disable-opencl")) { #ifdef HAVE_OPENCL exclude_opencl = TRUE; #endif argv[k] = NULL; } else if(!strcmp(argv[k], "--")) { // "--" confuses the argument parser of glib/gtk. remove it. argv[k] = NULL; break; } else return usage(argv[0]); // fail on unrecognized options } } // remove the NULLs to not confuse gtk_init() later. for(int i = 1; i < argc; i++) { int k; for(k = i; k < argc; k++) if(argv[k] != NULL) break; if(k > i) { k -= i; for(int j = i + k; j < argc; j++) { argv[j-k] = argv[j]; argv[j] = NULL; } argc -= k; } } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] at startup\n"); dt_print_mem_usage(); } if(init_gui) { // I doubt that connecting to dbus for darktable-cli makes sense darktable.dbus = dt_dbus_init(); // make sure that we have no stale global progress bar visible. thus it's run as early is possible dt_control_progress_init(darktable.control); } #ifdef _OPENMP omp_set_num_threads(darktable.num_openmp_threads); #endif dt_loc_init_datadir(datadir_from_command); dt_loc_init_plugindir(moduledir_from_command); if(dt_loc_init_tmp_dir(tmpdir_from_command)) { fprintf(stderr, "error: invalid temporary directory: %s\n", darktable.tmpdir); return usage(argv[0]); } dt_loc_init_user_config_dir(configdir_from_command); dt_loc_init_user_cache_dir(cachedir_from_command); #ifdef USE_LUA dt_lua_init_early(L); #endif // thread-safe init: dt_exif_init(); char datadir[PATH_MAX] = { 0 }; dt_loc_get_user_config_dir(datadir, sizeof(datadir)); char darktablerc[PATH_MAX] = { 0 }; snprintf(darktablerc, sizeof(darktablerc), "%s/darktablerc", datadir); // initialize the config backend. this needs to be done first... darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t)); dt_conf_init(darktable.conf, darktablerc, config_override); g_slist_free_full(config_override, g_free); // set the interface language const gchar *lang = dt_conf_get_string("ui_last/gui_language"); #if defined(_WIN32) // get the default locale if no language preference was specified in the config file if(lang == NULL || lang[0] == '\0') { const wchar_t *wcLocaleName = NULL; wcLocaleName = dtwin_get_locale(); if(wcLocaleName != NULL) { gchar *langLocale; langLocale = g_utf16_to_utf8(wcLocaleName, -1, NULL, NULL, NULL); if(langLocale != NULL) { g_free((gchar *)lang); lang = g_strdup(langLocale); } } } #endif // defined (_WIN32) if(lang != NULL && lang[0] != '\0') { g_setenv("LANGUAGE", lang, 1); if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale(); setlocale(LC_MESSAGES, lang); g_setenv("LANG", lang, 1); } g_free((gchar *)lang); // we need this REALLY early so that error messages can be shown, however after gtk_disable_setlocale if(init_gui) { #ifdef GDK_WINDOWING_WAYLAND // There are currently bad interactions with Wayland (drop-downs // are very narrow, scroll events lost). Until this is fixed, give // priority to the XWayland backend for Wayland users. gdk_set_allowed_backends("x11,*"); #endif gtk_init(&argc, &argv); } // detect cpu features and decide which codepaths to enable dt_codepaths_init(); // get the list of color profiles darktable.color_profiles = dt_colorspaces_init(); // initialize the database darktable.db = dt_database_init(dbfilename_from_command, load_data); if(darktable.db == NULL) { printf("ERROR : cannot open database\n"); return 1; } else if(!dt_database_get_lock_acquired(darktable.db)) { gboolean image_loaded_elsewhere = FALSE; #ifndef MAC_INTEGRATION // send the images to the other instance via dbus fprintf(stderr, "trying to open the images in the running instance\n"); GDBusConnection *connection = NULL; for(int i = 1; i < argc; i++) { // make the filename absolute ... if(argv[i] == NULL || *argv[i] == '\0') continue; gchar *filename = dt_util_normalize_path(argv[i]); if(filename == NULL) continue; if(!connection) connection = g_bus_get_sync(G_BUS_TYPE_SESSION, NULL, NULL); // ... and send it to the running instance of darktable image_loaded_elsewhere = g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable", "org.darktable.service.Remote", "Open", g_variant_new("(s)", filename), NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL) != NULL; g_free(filename); } if(connection) g_object_unref(connection); #endif if(!image_loaded_elsewhere) dt_database_show_error(darktable.db); return 1; } // Initialize the signal system darktable.signals = dt_control_signal_init(); // Make sure that the database and xmp files are in sync // We need conf and db to be up and running for that which is the case here. // FIXME: is this also useful in non-gui mode? GList *changed_xmp_files = NULL; if(init_gui && dt_conf_get_bool("run_crawler_on_start")) { changed_xmp_files = dt_control_crawler_run(); } if(init_gui) { dt_control_init(darktable.control); } else { if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:")) dt_gui_presets_init(); // init preset db schema. darktable.control->running = 0; darktable.control->accelerators = NULL; dt_pthread_mutex_init(&darktable.control->run_mutex, NULL); } // initialize collection query darktable.collection = dt_collection_new(NULL); /* initialize selection */ darktable.selection = dt_selection_new(); /* capabilities set to NULL */ darktable.capabilities = NULL; // Initialize the password storage engine darktable.pwstorage = dt_pwstorage_new(); darktable.guides = dt_guides_init(); #ifdef HAVE_GRAPHICSMAGICK /* GraphicsMagick init */ InitializeMagick(darktable.progname); // *SIGH* dt_set_signal_handlers(); #endif darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); #ifdef HAVE_OPENCL dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics); #endif darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); darktable.noiseprofile_parser = dt_noiseprofile_init(noiseprofiles_from_command); // must come before mipmap_cache, because that one will need to access // image dimensions stored in here: darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t)); dt_image_cache_init(darktable.image_cache); darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t)); dt_mipmap_cache_init(darktable.mipmap_cache); // The GUI must be initialized before the views, because the init() // functions of the views depend on darktable.control->accels_* to register // their keyboard accelerators if(init_gui) { darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t)); if(dt_gui_gtk_init(darktable.gui)) return 1; dt_bauhaus_init(); } else darktable.gui = NULL; darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t)); dt_view_manager_init(darktable.view_manager); // check whether we were able to load darkroom view. if we failed, we'll crash everywhere later on. if(!darktable.develop) return 1; darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t)); dt_imageio_init(darktable.imageio); // load the darkroom mode plugins once: dt_iop_load_modules_so(); if(init_gui) { #ifdef HAVE_GPHOTO2 // Initialize the camera control. // this is done late so that the gui can react to the signal sent but before switching to lighttable! darktable.camctl = dt_camctl_new(); #endif darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t)); dt_lib_init(darktable.lib); dt_gui_gtk_load_config(); // init the gui part of views dt_view_manager_gui_init(darktable.view_manager); // Loading the keybindings char keyfile[PATH_MAX] = { 0 }; // First dump the default keymapping snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir); gtk_accel_map_save(keyfile); // Removing extraneous semi-colons from the default keymap strip_semicolons_from_keymap(keyfile); // Then load any modified keys if available snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir); if(g_file_test(keyfile, G_FILE_TEST_EXISTS)) gtk_accel_map_load(keyfile); else gtk_accel_map_save(keyfile); // Save the default keymap if none is present // initialize undo struct darktable.undo = dt_undo_init(); } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] after successful startup\n"); dt_print_mem_usage(); } dt_image_local_copy_synch(); /* init lua last, since it's user made stuff it must be in the real environment */ #ifdef USE_LUA dt_lua_init(darktable.lua_state.state, lua_command); #endif if(init_gui) { const char *mode = "lighttable"; // april 1st: you have to earn using dt first! or know that you can switch views with keyboard shortcuts time_t now; time(&now); struct tm lt; localtime_r(&now, <); if(lt.tm_mon == 3 && lt.tm_mday == 1) mode = "knight"; // we have to call dt_ctl_switch_mode_to() here already to not run into a lua deadlock. // having another call later is ok dt_ctl_switch_mode_to(mode); #ifndef MAC_INTEGRATION // load image(s) specified on cmdline. // this has to happen after lua is initialized as image import can run lua code // If only one image is listed, attempt to load it in darkroom int last_id = 0; gboolean only_single_images = TRUE; int loaded_images = 0; for(int i = 1; i < argc; i++) { gboolean single_image = FALSE; if(argv[i] == NULL || *argv[i] == '\0') continue; int new_id = dt_load_from_string(argv[i], FALSE, &single_image); if(new_id > 0) { last_id = new_id; loaded_images++; if(!single_image) only_single_images = FALSE; } } if(loaded_images == 1 && only_single_images) { dt_control_set_mouse_over_id(last_id); dt_ctl_switch_mode_to("darkroom"); } #endif } // last but not least construct the popup that asks the user about images whose xmp files are newer than the // db entry if(init_gui && changed_xmp_files) { dt_control_crawler_show_image_list(changed_xmp_files); } dt_print(DT_DEBUG_CONTROL, "[init] startup took %f seconds\n", dt_get_wtime() - start_wtime); return 0; }
void sc_SetDenormalFlags() { _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _mm_setcsr(_mm_getcsr() | 0x40); // DAZ }
static void* HK_CALL hkWorkerThreadFunc(void *v) { vHavokCpuJobThreadPool::WorkerThreadData& data = *static_cast<vHavokCpuJobThreadPool::WorkerThreadData*>(v); vHavokCpuJobThreadPool::SharedThreadData& sharedThreadData = *data.m_sharedThreadData; HK_THREAD_LOCAL_SET( hkThreadNumber, data.m_threadId); #if defined(HK_COMPILER_HAS_INTRINSICS_IA32) && HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED // Flush all denormal/subnormal numbers (2^-1074 to 2^-1022) to zero. // Typically operations on denormals are very slow, up to 100 times slower than normal numbers. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif #ifdef HK_PLATFORM_XBOX360 XSetThreadProcessor(GetCurrentThread(), data.m_hardwareThreadId ); #elif defined(HK_PLATFORM_WIN32) && !defined(_VISION_WINRT) SetThreadIdealProcessor(GetCurrentThread(), data.m_hardwareThreadId); // Can use SetThreadAffityMask to be more force-full. #endif hkMemoryRouter memoryRouter; hkMemorySystem::getInstance().threadInit( memoryRouter, "vHavokCpuJobThreadPool" ); hkBaseSystem::initThread( &memoryRouter ); //hkUint32 allLockVal = 0; //hkReferencedObject::initThread(&allLockVal); if (sharedThreadData.m_timerBufferAllocation > 0) { // Allocate a monitor stream for this thread - this enables timers. hkMonitorStream::getInstance().resize(sharedThreadData.m_timerBufferAllocation); } data.m_monitorStreamBegin = hkMonitorStream::getInstance().getStart(); data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd(); hkCheckDeterminismUtil::initThread(); // VISION specific: Call any per thread callback if (sharedThreadData.m_OnWorkerThreadCreatedPtr) { sharedThreadData.m_CallbackProtect.enter(); sharedThreadData.m_OnWorkerThreadCreatedPtr->TriggerCallbacks( /* data needed? */); sharedThreadData.m_CallbackProtect.leave(); } // END VISION specific // Wait for the main thread to release the worker thread data.m_semaphore.acquire(); // The thread "main loop" while (data.m_killThread == false) { if (data.m_clearTimers) { hkMonitorStream::getInstance().reset(); data.m_monitorStreamEnd = hkMonitorStream::getInstance().getEnd(); data.m_clearTimers = false; } const bool isNotPrimary = false; hkCheckDeterminismUtil::workerThreadStartFrame(isNotPrimary); // Enable timers for critical sections just during the step call hkCriticalSection::setTimersEnabled(); sharedThreadData.m_jobQueue->processAllJobs(); // Disable timers for critical sections just during the step call hkCriticalSection::setTimersDisabled(); // Note collected timer data hkMonitorStream& stream = hkMonitorStream::getInstance(); data.m_monitorStreamEnd = stream.getEnd(); hkCheckDeterminismUtil::workerThreadFinishFrame(); if( sharedThreadData.m_gcThreadMemoryOnCompletion ) { hkMemorySystem::getInstance().garbageCollectThread( memoryRouter ); } // Release any thread (usually the main thread) which may be waiting for all worker threads to finish. sharedThreadData.m_workerThreadFinished.release(); // Immediately wait until the main thread releases the thread again data.m_semaphore.acquire(); } // Perform cleanup operations // VISION specific: Call any per thread callback if (sharedThreadData.m_OnWorkerThreadFinishedPtr) { sharedThreadData.m_CallbackProtect.enter(); sharedThreadData.m_OnWorkerThreadFinishedPtr->TriggerCallbacks( /* data needed? */); sharedThreadData.m_CallbackProtect.leave(); } // END VISION specific hkCheckDeterminismUtil::quitThread(); hkBaseSystem::quitThread(); hkMemorySystem::getInstance().threadQuit( memoryRouter ); sharedThreadData.m_workerThreadFinished.release(); return 0; }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); printf("Riccati solver performance test - single precision\n"); printf("\n"); // maximum frequency of the processor const float GHz_max = 2.9; //3.6; //2.9; printf("Frequency used to compute theoretical peak: %5.1f GHz (edit test_dricposv.c to modify this value).\n", GHz_max); printf("\n"); // maximum flops per cycle, single precision #if defined(TARGET_X64_AVX) const float flops_max = 16; printf("Testing solvers for AVX instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) const float flops_max = 8; printf("Testing solvers for SSE3 instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEXA9) const float flops_max = 4; printf("Testing solvers for ARMv7a NEON instruction set: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X86_ATOM) const float flops_max = 4; printf("Testing solvers for SSE3 instruction set, 32 bit, optimized for Intel Atom: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_POWERPC_G2) const float flops_max = 2; printf("Testing solvers for POWERPC instruction set, 32 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4) const float flops_max = 2; printf("Testing reference solvers, 4x4 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_2X2) const float flops_max = 2; printf("Testing reference solvers, 2x2 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #endif printf("\n"); printf("Tested solvers:\n"); printf("-sv : Riccati factorization and system solution (prediction step in IP methods)\n"); printf("-trs: system solution after a previous call to Riccati factorization (correction step in IP methods)\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) printf("\nflush to zero on\n"); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif // to throw floating-point exception /*#ifndef __APPLE__*/ /* feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);*/ /*#endif*/ int err; int i, j, ii, jj, idx; const int bsd = D_MR; //d_get_mr(); const int bss = S_MR; //s_get_mr(); int info = 0; int nn[] = {4, 6, 8, 10, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300}; int nnrep[] = {10000, 10000, 10000, 10000, 10000, 4000, 4000, 2000, 2000, 1000, 1000, 400, 400, 400, 200, 200, 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 40, 40, 40, 40, 40, 20, 20, 20, 20, 20, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}; int vnx[] = {8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 1024}; int vnrep[] = {100, 100, 100, 100, 100, 100, 50, 50, 50, 20, 10, 10}; int vN[] = {4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256}; int ll; for(ll=0; ll<77; ll++) /* for(ll=0; ll<1; ll++)*/ { int nx = nn[ll];//NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem) int nu = 2;//NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = 10;//NN;//10; // horizon lenght int nrep = nnrep[ll]; /* int nx = NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem)*/ /* int nu = NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)*/ /* int N = NN;//10; // horizon lenght*/ /* int nrep = NREP;*/ int rep; int nz = nx+nu+1; int pnz = bss*((nz+bss-nu%bss+bss-1)/bss); /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.1; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 3.5; x0[1] = 3.5; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nu, B, nx); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); /* packed */ double *BAb; d_zeros(&BAb, nx, nz); dmcopy(nx, nu, B, nx, BAb, nx); dmcopy(nx, nx, A, nx, BAb+nu*nx, nx); dmcopy(nx, 1 , b, nx, BAb+(nu+nx)*nx, nx); // d_print_mat(nx, nx+nu+1, BAb, nx); /* transposed */ double *BAbt; d_zeros_align(&BAbt, pnz, pnz); for(ii=0; ii<nx; ii++) for(jj=0; jj<nz; jj++) { BAbt[jj+pnz*ii] = BAb[ii+nx*jj]; } // d_print_mat(nz, nx+1, BAbt, pnz); // s_print_mat(nz, nx+1, sBAbt, pnz); // return 0; /* packed into contiguous memory */ double *pBAbt; d_zeros_align(&pBAbt, pnz, pnz); d_cvt_mat2pmat(nz, nx, 0, bsd, BAbt, pnz, pBAbt, pnz); float *psBAbt; s_zeros_align(&psBAbt, pnz, pnz); s_cvt_d2s_pmat(nz, nx, bsd, pBAbt, pnz, bss, psBAbt, pnz); // d_print_pmat(nz, nx, bsd, pBAbt, pnz); // s_print_pmat(nz, nx, bss, spBAbt, pnz); /************************************************ * cost function ************************************************/ double *Q; d_zeros_align(&Q, pnz, pnz); for(ii=0; ii<nu; ii++) Q[ii*(pnz+1)] = 2.0; for(; ii<pnz; ii++) Q[ii*(pnz+1)] = 1.0; for(ii=0; ii<nz; ii++) Q[nx+nu+ii*pnz] = 1.0; Q[(nx+nu)*(pnz+1)] = 1e6; /* packed into contiguous memory */ float *pQ; s_zeros_align(&pQ, pnz, pnz); cvt_d2s_mat2pmat(nz, nz, 0, bss, Q, pnz, pQ, pnz); /* matrices series */ float *(hpQ[N+1]); float *(hq[N+1]); float *(hux[N+1]); float *(hpi[N+1]); float *(hpBAbt[N]); float *(hrb[N]); float *(hrq[N+1]); for(jj=0; jj<N; jj++) { s_zeros_align(&hpQ[jj], pnz, pnz); s_zeros_align(&hq[jj], pnz, 1); s_zeros_align(&hux[jj], pnz, 1); s_zeros_align(&hpi[jj], nx, 1); hpBAbt[jj] = psBAbt; s_zeros_align(&hrb[jj], nx, 1); s_zeros_align(&hrq[jj], nx+nu, 1); } s_zeros_align(&hpQ[N], pnz, pnz); s_zeros_align(&hq[N], pnz, 1); s_zeros_align(&hux[N], pnz, 1); s_zeros_align(&hpi[N], nx, 1); s_zeros_align(&hrq[N], nx+nu, 1); // starting guess for(jj=0; jj<nx; jj++) hux[0][nu+jj] = (float) x0[jj]; float *pL; s_zeros_align(&pL, pnz, pnz); float *pBAbtL; s_zeros_align(&pBAbtL, pnz, pnz); /************************************************ * riccati-like iteration ************************************************/ // predictor // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // call the solver sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info); if(PRINTRES==1) { /* print result */ printf("\n\nsv\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nu, hux[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\nsv\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hpi[ii+1], 1); } // corrector // clear solution for(ii=0; ii<N; ii++) { for(jj=0; jj<nu; jj++) hux[ii][jj] = 0; for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0; } // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // call the solver sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi); if(PRINTRES==1) { // print result printf("\n\ntrs\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nu, hux[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\ntrs\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hpi[ii+1], 1); } // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // residuals computation sres(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, hpi, hrq, hrb); if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\nres\n\n"); for(ii=0; ii<+N; ii++) s_print_mat(1, nx+nu, hrq[ii], 1); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hrb[ii], 1); } // timing struct timeval tv0, tv1, tv2; gettimeofday(&tv0, NULL); // start // double precision for(rep=0; rep<nrep; rep++) { // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // call the solver sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info); } gettimeofday(&tv1, NULL); // start for(rep=0; rep<nrep; rep++) { // clear solution for(ii=0; ii<N; ii++) { for(jj=0; jj<nu; jj++) hux[ii][jj] = 0; for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0; } // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // call the solver sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi); } gettimeofday(&tv2, NULL); // start float time_sv = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); float flop_sv = (1.0/3.0*nx*nx*nx+3.0/2.0*nx*nx) + N*(7.0/3.0*nx*nx*nx+4.0*nx*nx*nu+2.0*nx*nu*nu+1.0/3.0*nu*nu*nu+13.0/2.0*nx*nx+9.0*nx*nu+5.0/2.0*nu*nu); if(COMPUTE_MULT==1) flop_sv += N*2*nx*nx; float Gflops_sv = 1e-9*flop_sv/time_sv; float time_trs = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6); float flop_trs = N*(8.0*nx*nx+8.0*nx*nu+2.0*nu*nu); if(COMPUTE_MULT==1) flop_trs += N*2*nx*nx; float Gflops_trs = 1e-9*flop_trs/time_trs; float Gflops_max = flops_max * GHz_max; if(ll==0) printf("\nnx\tnu\tN\tsv time\t\tsv Gflops\tsv \%\t\ttrs time\ttrs Gflops\ttrs \%\n\n"); printf("%d\t%d\t%d\t%e\t%f\t%f\t%e\t%f\t%f\n", nx, nu, N, time_sv, Gflops_sv, 100.0*Gflops_sv/Gflops_max, time_trs, Gflops_trs, 100.0*Gflops_trs/Gflops_max); /************************************************ * return ************************************************/ free(A); free(B); free(b); free(x0); free(BAb); free(BAbt); free(pBAbt); free(Q); free(pQ); free(pL); free(pBAbtL); for(jj=0; jj<N; jj++) { free(hpQ[jj]); free(hq[jj]); free(hux[jj]); free(hpi[jj]); } free(hpQ[N]); free(hq[N]); free(hux[N]); free(hpi[N]); } // increase size printf("\n"); printf("\n"); printf("\n"); return 0; }
int dt_init(int argc, char *argv[], const int init_gui,lua_State *L) { #ifndef __WIN32__ if(getuid() == 0 || geteuid() == 0) printf("WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n"); #endif // make everything go a lot faster. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #if !defined __APPLE__ && !defined __WIN32__ _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler); #endif #ifndef __GNUC_PREREQ // on OSX, gcc-4.6 and clang chokes if this is not here. #if defined __GNUC__ && defined __GNUC_MINOR__ # define __GNUC_PREREQ(maj, min) \ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) #else # define __GNUC_PREREQ(maj, min) 0 #endif #endif #ifndef __has_builtin // http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros #define __has_builtin(x) false #endif #ifndef __SSE3__ #error "Unfortunately we depend on SSE3 instructions at this time." #error "Please contribute a backport patch (or buy a newer processor)." #else #if (__GNUC_PREREQ(4,8) || __has_builtin(__builtin_cpu_supports)) //FIXME: check will work only in GCC 4.8+ !!! implement manual cpuid check !!! //NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC if (!__builtin_cpu_supports("sse3")) { fprintf(stderr, "[dt_init] unfortunately we depend on SSE3 instructions at this time.\n"); fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n"); return 1; } #else //FIXME: no way to check for SSE3 in runtime, implement manual cpuid check !!! #endif #endif #ifdef M_MMAP_THRESHOLD mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */ #endif // we have to have our share dir in XDG_DATA_DIRS, // otherwise GTK+ won't find our logo for the about screen (and maybe other things) { const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS"); gchar *new_xdg_data_dirs = NULL; gboolean set_env = TRUE; if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0') { // check if DARKTABLE_SHAREDIR is already in there gboolean found = FALSE; gchar **tokens = g_strsplit(xdg_data_dirs, ":", 0); // xdg_data_dirs is neither NULL nor empty => tokens != NULL for(char **iter = tokens; *iter != NULL; iter++) if(!strcmp(DARKTABLE_SHAREDIR, *iter)) { found = TRUE; break; } g_strfreev(tokens); if(found) set_env = FALSE; else new_xdg_data_dirs = g_strjoin(":", DARKTABLE_SHAREDIR, xdg_data_dirs, NULL); } else { // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a default if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/")) new_xdg_data_dirs = g_strdup("/usr/local/share/:/usr/share/"); else new_xdg_data_dirs = g_strdup_printf("%s:/usr/local/share/:/usr/share/", DARKTABLE_SHAREDIR); } if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1); g_free(new_xdg_data_dirs); } setlocale(LC_ALL, ""); bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR); bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); textdomain (GETTEXT_PACKAGE); // init all pointers to 0: memset(&darktable, 0, sizeof(darktable_t)); darktable.progname = argv[0]; // database gchar *dbfilename_from_command = NULL; char *datadir_from_command = NULL; char *moduledir_from_command = NULL; char *tmpdir_from_command = NULL; char *configdir_from_command = NULL; char *cachedir_from_command = NULL; #ifdef USE_LUA char *lua_command = NULL; #endif darktable.num_openmp_threads = 1; #ifdef _OPENMP darktable.num_openmp_threads = omp_get_num_procs(); #endif darktable.unmuted = 0; GSList *images_to_load = NULL, *config_override = NULL; for(int k=1; k<argc; k++) { if(argv[k][0] == '-') { if(!strcmp(argv[k], "--help")) { return usage(argv[0]); } if(!strcmp(argv[k], "-h")) { return usage(argv[0]); } else if(!strcmp(argv[k], "--version")) { printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2014 johannes hanika\n"PACKAGE_BUGREPORT"\n" #ifdef _OPENMP "OpenMP support enabled\n" #else "OpenMP support disabled\n" #endif ); return 1; } else if(!strcmp(argv[k], "--library")) { dbfilename_from_command = argv[++k]; } else if(!strcmp(argv[k], "--datadir")) { datadir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--moduledir")) { moduledir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--tmpdir")) { tmpdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--configdir")) { configdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--cachedir")) { cachedir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--localedir")) { bindtextdomain (GETTEXT_PACKAGE, argv[++k]); } else if(argv[k][1] == 'd' && argc > k+1) { if(!strcmp(argv[k+1], "all")) darktable.unmuted = 0xffffffff; // enable all debug information else if(!strcmp(argv[k+1], "cache")) darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module else if(!strcmp(argv[k+1], "control")) darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module else if(!strcmp(argv[k+1], "dev")) darktable.unmuted |= DT_DEBUG_DEV; // develop module else if(!strcmp(argv[k+1], "fswatch")) darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module else if(!strcmp(argv[k+1], "input")) darktable.unmuted |= DT_DEBUG_INPUT; // input devices else if(!strcmp(argv[k+1], "camctl")) darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module else if(!strcmp(argv[k+1], "perf")) darktable.unmuted |= DT_DEBUG_PERF; // performance measurements else if(!strcmp(argv[k+1], "pwstorage")) darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module else if(!strcmp(argv[k+1], "opencl")) darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl else if(!strcmp(argv[k+1], "sql")) darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries else if(!strcmp(argv[k+1], "memory")) darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then. else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff. else if(!strcmp(argv[k+1], "nan")) darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe. else if(!strcmp(argv[k+1], "masks")) darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff. else if(!strcmp(argv[k+1], "lua")) darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console else return usage(argv[0]); k ++; } else if(argv[k][1] == 't' && argc > k+1) { darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100); printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads); k ++; } else if(!strcmp(argv[k], "--conf")) { gchar *keyval = g_strdup(argv[++k]), *c = keyval; gchar *end = keyval + strlen(keyval); while(*c != '=' && c < end) c++; if(*c == '=' && *(c+1) != '\0') { *c++ = '\0'; dt_conf_string_entry_t *entry = (dt_conf_string_entry_t*)g_malloc(sizeof(dt_conf_string_entry_t)); entry->key = g_strdup(keyval); entry->value = g_strdup(c); config_override = g_slist_append(config_override, entry); } g_free(keyval); } else if(!strcmp(argv[k], "--luacmd")) { #ifdef USE_LUA lua_command = argv[++k]; #else ++k; #endif } } #ifndef MAC_INTEGRATION else { images_to_load = g_slist_append(images_to_load, argv[k]); } #endif } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] at startup\n"); dt_print_mem_usage(); } #ifdef _OPENMP omp_set_num_threads(darktable.num_openmp_threads); #endif dt_loc_init_datadir(datadir_from_command); dt_loc_init_plugindir(moduledir_from_command); if(dt_loc_init_tmp_dir(tmpdir_from_command)) { printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir); return usage(argv[0]); } dt_loc_init_user_config_dir(configdir_from_command); dt_loc_init_user_cache_dir(cachedir_from_command); #if !GLIB_CHECK_VERSION(2, 35, 0) g_type_init(); #endif // does not work, as gtk is not inited yet. // even if it were, it's a super bad idea to invoke gtk stuff from // a signal handler. /* check cput caps */ // dt_check_cpu(argc,argv); #ifdef HAVE_GEGL char geglpath[PATH_MAX]; char datadir[PATH_MAX]; dt_loc_get_datadir(datadir, sizeof(datadir)); snprintf(geglpath, sizeof(geglpath), "%s/gegl:/usr/lib/gegl-0.0", datadir); (void)setenv("GEGL_PATH", geglpath, 1); gegl_init(&argc, &argv); #endif #ifdef USE_LUA dt_lua_init_early(L); #endif // thread-safe init: dt_exif_init(); char datadir[PATH_MAX]; dt_loc_get_user_config_dir (datadir, sizeof(datadir)); char filename[PATH_MAX]; snprintf(filename, sizeof(filename), "%s/darktablerc", datadir); // initialize the config backend. this needs to be done first... darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t)); dt_conf_init(darktable.conf, filename, config_override); g_slist_free_full(config_override, g_free); // set the interface language const gchar* lang = dt_conf_get_string("ui_last/gui_language"); // we may not g_free 'lang' since it is owned by setlocale afterwards if(lang != NULL && lang[0] != '\0') { if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale(); } // initialize the database darktable.db = dt_database_init(dbfilename_from_command); if(darktable.db == NULL) { printf("ERROR : cannot open database\n"); return 1; } else if(!dt_database_get_lock_acquired(darktable.db)) { // send the images to the other instance via dbus if(images_to_load) { GSList *p = images_to_load; // get a connection! GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL); while (p != NULL) { // make the filename absolute ... gchar *filename = dt_make_path_absolute((gchar*)p->data); if(filename == NULL) continue; // ... and send it to the running instance of darktable g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable", "org.darktable.service.Remote", "Open", g_variant_new ("(s)", filename), NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL); p = g_slist_next(p); g_free(filename); } g_slist_free(images_to_load); g_object_unref(connection); } return 1; } // Initialize the signal system darktable.signals = dt_control_signal_init(); // Make sure that the database and xmp files are in sync before starting the fswatch. // We need conf and db to be up and running for that which is the case here. // FIXME: is this also useful in non-gui mode? GList *changed_xmp_files = NULL; if(init_gui && dt_conf_get_bool("run_crawler_on_start")) { changed_xmp_files = dt_control_crawler_run(); } // Initialize the filesystem watcher darktable.fswatch=dt_fswatch_new(); #ifdef HAVE_GPHOTO2 // Initialize the camera control darktable.camctl=dt_camctl_new(); #endif // get max lighttable thumbnail size: darktable.thumbnail_width = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"), 200, 3000); darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000); // and make sure it can be mip-mapped all the way from mip4 to mip0 darktable.thumbnail_width /= 16; darktable.thumbnail_width *= 16; darktable.thumbnail_height /= 16; darktable.thumbnail_height *= 16; // Initialize the password storage engine darktable.pwstorage=dt_pwstorage_new(); // FIXME: move there into dt_database_t dt_pthread_mutex_init(&(darktable.db_insert), NULL); dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL); dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL); darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t)); if(init_gui) { dt_control_init(darktable.control); } else { if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:")) dt_gui_presets_init(); // init preset db schema. darktable.control->running = 0; darktable.control->accelerators = NULL; dt_pthread_mutex_init(&darktable.control->run_mutex, NULL); } // initialize collection query darktable.collection_listeners = NULL; darktable.collection = dt_collection_new(NULL); /* initialize selection */ darktable.selection = dt_selection_new(); /* capabilities set to NULL */ darktable.capabilities = NULL; #ifdef HAVE_GRAPHICSMAGICK /* GraphicsMagick init */ InitializeMagick(darktable.progname); #endif darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); #ifdef HAVE_OPENCL dt_opencl_init(darktable.opencl, argc, argv); #endif darktable.blendop = (dt_blendop_t *)calloc(1, sizeof(dt_blendop_t)); dt_develop_blend_init(darktable.blendop); darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); // must come before mipmap_cache, because that one will need to access // image dimensions stored in here: darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t)); dt_image_cache_init(darktable.image_cache); darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t)); dt_mipmap_cache_init(darktable.mipmap_cache); // The GUI must be initialized before the views, because the init() // functions of the views depend on darktable.control->accels_* to register // their keyboard accelerators if(init_gui) { darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t)); if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1; dt_bauhaus_init(); } else darktable.gui = NULL; darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t)); dt_view_manager_init(darktable.view_manager); darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t)); dt_imageio_init(darktable.imageio); // load the darkroom mode plugins once: dt_iop_load_modules_so(); if(init_gui) { darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t)); dt_lib_init(darktable.lib); dt_control_load_config(darktable.control); } if(init_gui) { // Loading the keybindings char keyfile[PATH_MAX]; // First dump the default keymapping snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir); gtk_accel_map_save(keyfile); // Removing extraneous semi-colons from the default keymap strip_semicolons_from_keymap(keyfile); // Then load any modified keys if available snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir); if(g_file_test(keyfile, G_FILE_TEST_EXISTS)) gtk_accel_map_load(keyfile); else gtk_accel_map_save(keyfile); // Save the default keymap if none is present // I doubt that connecting to dbus for darktable-cli makes sense darktable.dbus = dt_dbus_init(); // initialize undo struct darktable.undo = dt_undo_init(); // load image(s) specified on cmdline int id = 0; if(images_to_load) { // If only one image is listed, attempt to load it in darkroom gboolean load_in_dr = (g_slist_next(images_to_load) == NULL); GSList *p = images_to_load; while (p != NULL) { // don't put these function calls into MAX(), the macro will evaluate // it twice (and happily deadlock, in this particular case) int newid = dt_load_from_string((gchar*)p->data, load_in_dr); id = MAX(id, newid); p = g_slist_next(p); } if (!load_in_dr || id == 0) dt_ctl_switch_mode_to(DT_LIBRARY); g_slist_free(images_to_load); } else dt_ctl_switch_mode_to(DT_LIBRARY); } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] after successful startup\n"); dt_print_mem_usage(); } dt_image_local_copy_synch(); /* init lua last, since it's user made stuff it must be in the real environment */ #ifdef USE_LUA dt_lua_init(darktable.lua_state.state,lua_command); #endif // last but not least construct the popup that asks the user about images whose xmp files are newer than the db entry if(init_gui && changed_xmp_files) { dt_control_crawler_show_image_list(changed_xmp_files); } return 0; }
TutorialApplication::TutorialApplication (const std::string& tutorialName, int features) : Application(features), tutorialName(tutorialName), shader(SHADER_DEFAULT), width(512), height(512), pixels(nullptr), outputImageFilename(""), skipBenchmarkFrames(0), numBenchmarkFrames(0), numBenchmarkRepetitions(1), interactive(true), fullscreen(false), window_width(512), window_height(512), windowID(0), time0(getSeconds()), debug_int0(0), debug_int1(0), mouseMode(0), clickX(0), clickY(0), speed(1.0f), moveDelta(zero), command_line_camera(false), print_frame_rate(false), avg_render_time(64,1.0), avg_frame_time(64,1.0), avg_mrayps(64,1.0), print_camera(false), debug0(0), debug1(0), debug2(0), debug3(0), iflags_coherent(RTC_INTERSECT_COHERENT), iflags_incoherent(RTC_INTERSECT_INCOHERENT) { /* only a single instance of this class is supported */ assert(instance == nullptr); instance = this; /* for best performance set FTZ and DAZ flags in MXCSR control and status register */ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); registerOption("c", [this] (Ref<ParseStream> cin, const FileName& path) { FileName file = path + cin->getFileName(); parseCommandLine(new ParseStream(new LineCommentFilter(file, "#")), file.path()); }, "-c <filename>: parses command line option from <filename>"); registerOption("o", [this] (Ref<ParseStream> cin, const FileName& path) { outputImageFilename = cin->getFileName(); interactive = false; }, "-o <filename>: output image filename"); /* camera settings */ registerOption("vp", [this] (Ref<ParseStream> cin, const FileName& path) { camera.from = cin->getVec3fa(); command_line_camera = true; }, "--vp <float> <float> <float>: camera position"); registerOption("vi", [this] (Ref<ParseStream> cin, const FileName& path) { camera.to = cin->getVec3fa(); command_line_camera = true; }, "--vi <float> <float> <float>: camera lookat position"); registerOption("vd", [this] (Ref<ParseStream> cin, const FileName& path) { camera.to = camera.from + cin->getVec3fa(); command_line_camera = true; }, "--vd <float> <float> <float>: camera direction vector"); registerOption("vu", [this] (Ref<ParseStream> cin, const FileName& path) { camera.up = cin->getVec3fa(); command_line_camera = true; }, "--vu <float> <float> <float>: camera up vector"); registerOption("fov", [this] (Ref<ParseStream> cin, const FileName& path) { camera.fov = cin->getFloat(); command_line_camera = true; }, "--fov <float>: vertical field of view"); /* framebuffer settings */ registerOption("size", [this] (Ref<ParseStream> cin, const FileName& path) { width = cin->getInt(); height = cin->getInt(); }, "--size <width> <height>: sets image size"); registerOption("fullscreen", [this] (Ref<ParseStream> cin, const FileName& path) { fullscreen = true; }, "--fullscreen: starts in fullscreen mode"); registerOption("benchmark", [this] (Ref<ParseStream> cin, const FileName& path) { skipBenchmarkFrames = cin->getInt(); numBenchmarkFrames = cin->getInt(); if (cin->peek() != "" && cin->peek()[0] != '-') numBenchmarkRepetitions = cin->getInt(); interactive = false; rtcore += ",benchmark=1,start_threads=1"; }, "--benchmark <N> <M> <R>: enabled benchmark mode, builds scene, skips N frames, renders M frames, and repeats this R times"); registerOption("nodisplay", [this] (Ref<ParseStream> cin, const FileName& path) { skipBenchmarkFrames = 0; numBenchmarkFrames = 2048; interactive = false; }, "--nodisplay: enabled benchmark mode, continously renders frames"); registerOption("print-frame-rate", [this] (Ref<ParseStream> cin, const FileName& path) { print_frame_rate = true; }, "--print-frame-rate: prints framerate for each frame on console"); registerOption("print-camera", [this] (Ref<ParseStream> cin, const FileName& path) { print_camera = true; }, "--print-camera: prints camera for each frame on console"); registerOption("debug0", [this] (Ref<ParseStream> cin, const FileName& path) { debug0 = cin->getInt(); }, "--debug0: sets internal debugging value"); registerOption("debug1", [this] (Ref<ParseStream> cin, const FileName& path) { debug1 = cin->getInt(); }, "--debug1: sets internal debugging value"); registerOption("debug2", [this] (Ref<ParseStream> cin, const FileName& path) { debug2 = cin->getInt(); }, "--debug2: sets internal debugging value"); registerOption("debug3", [this] (Ref<ParseStream> cin, const FileName& path) { debug3 = cin->getInt(); }, "--debug3: sets internal debugging value"); /* output filename */ registerOption("shader", [this] (Ref<ParseStream> cin, const FileName& path) { std::string mode = cin->getString(); if (mode == "default" ) shader = SHADER_DEFAULT; else if (mode == "eyelight") shader = SHADER_EYELIGHT; else if (mode == "occlusion") shader = SHADER_OCCLUSION; else if (mode == "uv" ) shader = SHADER_UV; else if (mode == "texcoords") shader = SHADER_TEXCOORDS; else if (mode == "texcoords-grid") shader = SHADER_TEXCOORDS_GRID; else if (mode == "Ng" ) shader = SHADER_NG; else if (mode == "cycles" ) { shader = SHADER_CYCLES; scale = cin->getFloat(); } else if (mode == "geomID" ) shader = SHADER_GEOMID; else if (mode == "primID" ) shader = SHADER_GEOMID_PRIMID; else if (mode == "ao" ) shader = SHADER_AMBIENT_OCCLUSION; else throw std::runtime_error("invalid shader:" +mode); }, "--shader <string>: sets shader to use at startup\n" " default: default tutorial shader\n" " eyelight: eyelight shading\n" " occlusion: occlusion shading\n" " uv: uv debug shader\n" " texcoords: texture coordinate debug shader\n" " texcoords-grid: grid texture debug shader\n" " Ng: visualization of shading normal\n" " cycles <float>: CPU cycle visualization\n" " geomID: visualization of geometry ID\n" " primID: visualization of geometry and primitive ID\n" " ao: ambient occlusion shader"); if (features & FEATURE_STREAM) { /* register parsing of stream mode */ registerOption("mode", [] (Ref<ParseStream> cin, const FileName& path) { std::string mode = cin->getString(); if (mode == "normal") g_mode = MODE_NORMAL; else if (mode == "stream") g_mode = MODE_STREAM; else throw std::runtime_error("invalid mode:" +mode); }, "--mode: sets rendering mode\n" " normal : normal mode\n" " stream : stream mode\n"); } registerOption("coherent", [this] (Ref<ParseStream> cin, const FileName& path) { g_iflags_coherent = iflags_coherent = RTC_INTERSECT_COHERENT; g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_COHERENT; }, "--coherent: force using RTC_INTERSECT_COHERENT hint when tracing rays"); registerOption("incoherent", [this] (Ref<ParseStream> cin, const FileName& path) { g_iflags_coherent = iflags_coherent = RTC_INTERSECT_INCOHERENT; g_iflags_incoherent = iflags_incoherent = RTC_INTERSECT_INCOHERENT; }, "--incoherent: force using RTC_INTERSECT_INCOHERENT hint when tracing rays"); }
int main() { printf("\n"); printf("\n"); printf("\n"); printf( " HPMPC -- Library for High-Performance implementation of solvers for " "MPC.\n"); printf( " Copyright (C) 2014-2015 by Technical University of Denmark. All " "rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_INTEL_HASWELL) || \ defined(TARGET_X64_INTEL_SABDY_BRIDGE) || \ defined(TARGET_X64_INTEL_CORE) || defined(TARGET_X86_AMD_BULLDOZER) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! // works only with one thread // !!! #endif int ii, jj; int rep, nrep = NREP; int nx = 8; // number of states (it has to be even for the mass-spring // system test problem) int nu = 3; // number of inputs (controllers) (it has to be at least 1 and // at most nx/2 for the mass-spring system test problem) int N = 15; // horizon length int nb = 11; // number of box constrained inputs and states int ng = 0; // 4; // number of general constraints int ngN = 4; // 4; // number of general constraints at the last stage // int N2 = 3; // horizon length of partially condensed problem int nbu = nu < nb ? nu : nb; int nbx = nb - nu > 0 ? nb - nu : 0; // stage-wise variant size int nxx[N + 1]; #if defined(ELIMINATE_X0) nxx[0] = 0; #else nxx[0] = nx; #endif for (ii = 1; ii <= N; ii++) nxx[ii] = nx; int nuu[N + 1]; for (ii = 0; ii < N; ii++) nuu[ii] = nu; nuu[N] = 0; int nbb[N + 1]; #if defined(ELIMINATE_X0) nbb[0] = nbu; #else nbb[0] = nb; #endif for (ii = 1; ii < N; ii++) nbb[ii] = nb; nbb[N] = nbx; int ngg[N + 1]; for (ii = 0; ii < N; ii++) ngg[ii] = ng; ngg[N] = ngN; printf( " Test problem: mass-spring system with %d masses and %d controls.\n", nx / 2, nu); printf("\n"); printf( " MPC problem size: %d states, %d inputs, %d horizon length, %d " "two-sided box constraints, %d two-sided general constraints.\n", nx, nu, N, nb, ng); printf("\n"); printf( " IP method parameters: predictor-corrector IP, double precision, %d " "maximum iterations, %5.1e exit tolerance in duality measure.\n", MAXITER, TOL); printf("\n"); #if defined(TARGET_X64_AVX2) printf(" HPMPC built for the AVX2 architecture\n"); #endif #if defined(TARGET_X64_AVX) printf(" HPMPC built for the AVX architecture\n"); #endif printf("\n"); /************************************************ * dynamical system ************************************************/ // state space matrices & initial state double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state // mass-spring system double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, A, B, b, x0); for (jj = 0; jj < nx; jj++) b[jj] = 0.1; for (jj = 0; jj < nx; jj++) x0[jj] = 0; x0[0] = 2.5; x0[1] = 2.5; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nu, B, nx); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); #if defined(ELIMINATE_X0) // compute b0 = b + A*x0 double *b0; d_zeros(&b0, nx, 1); dcopy_3l(nx, b, 1, b0, 1); dgemv_n_3l(nx, nx, A, nx, x0, b0); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, b0, nx); // then A0 is a matrix of size 0x0 double *A0; d_zeros(&A0, 0, 0); #endif /************************************************ * box constraints ************************************************/ int jj_end; int *idxb0; int_zeros(&idxb0, nbb[0], 1); double *lb0; d_zeros(&lb0, nbb[0], 1); double *ub0; d_zeros(&ub0, nbb[0], 1); #if defined(ELIMINATE_X0) for (jj = 0; jj < nbb[0]; jj++) { lb0[jj] = -0.5; // umin ub0[jj] = +0.5; // umin idxb0[jj] = jj; } #else jj_end = nbx < nbb[0] ? nbx : nbb[0]; for (jj = 0; jj < jj_end; jj++) { // lb0[jj] = x0[jj - nbu]; // initial state // ub0[jj] = x0[jj - nbu]; // initial state lb0[jj] = x0[jj]; // initial state ub0[jj] = x0[jj]; // initial state idxb0[jj] = jj; } for (; jj < nbb[0]; jj++) { lb0[jj] = -0.5; // umin ub0[jj] = +0.5; // umax idxb0[jj] = jj; } #endif // int_print_mat(nbb[0], 1, idxb0, nbb[0]); // d_print_mat(nbb[0], 1, lb0, nbb[0]); int *idxb1; int_zeros(&idxb1, nbb[1], 1); double *lb1; d_zeros(&lb1, nbb[1], 1); double *ub1; d_zeros(&ub1, nbb[1], 1); jj_end = nbx < nbb[1] ? nbx : nbb[1]; for (jj = 0; jj < jj_end; jj++) { lb1[jj] = -4.0; // xmin ub1[jj] = +4.0; // xmax idxb1[jj] = jj; } for (; jj < nbb[1]; jj++) { lb1[jj] = -0.5; // umin ub1[jj] = +0.5; // umax idxb1[jj] = jj; } // int_print_mat(nbb[1], 1, idxb1, nbb[1]); // d_print_mat(nbb[1], 1, lb1, nbb[1]); int *idxbN; int_zeros(&idxbN, nbb[N], 1); double *lbN; d_zeros(&lbN, nbb[N], 1); double *ubN; d_zeros(&ubN, nbb[N], 1); jj_end = nbx < nbb[N] ? nbx : nbb[N]; for (jj = 0; jj < jj_end; jj++) { lbN[jj] = -4.0; // xmin ubN[jj] = +4.0; // xmax idxbN[jj] = jj; } for (; jj < nbb[N]; jj++) { lbN[jj] = -0.5; // umin ubN[jj] = +0.5; // umax idxbN[jj] = jj; } // int_print_mat(nbb[N], 1, idxbN, nbb[N]); // d_print_mat(nbb[N], 1, lbN, nbb[N]); /************************************************ * general constraints ************************************************/ double *C; d_zeros(&C, ng, nx); double *D; d_zeros(&D, ng, nu); double *lg; d_zeros(&lg, ng, 1); double *ug; d_zeros(&ug, ng, 1); double *CN; d_zeros(&CN, ngN, nx); for (ii = 0; ii < ngN; ii++) CN[ii * (ngN + 1)] = 1.0; // d_print_mat(ngN, nx, CN, ngN); double *lgN; d_zeros(&lgN, ngN, 1); // force all states to 0 at the last stage double *ugN; d_zeros(&ugN, ngN, 1); // force all states to 0 at the last stage /************************************************ * cost function ************************************************/ double *Q; d_zeros(&Q, nx, nx); for (ii = 0; ii < nx; ii++) Q[ii * (nx + 1)] = 1.0; double *R; d_zeros(&R, nu, nu); for (ii = 0; ii < nu; ii++) R[ii * (nu + 1)] = 2.0; double *S; d_zeros(&S, nu, nx); double *q; d_zeros(&q, nx, 1); for (ii = 0; ii < nx; ii++) q[ii] = 0.1; double *r; d_zeros(&r, nu, 1); for (ii = 0; ii < nu; ii++) r[ii] = 0.2; #if defined(ELIMINATE_X0) // Q0 and q0 are matrices of size 0 double *Q0; d_zeros(&Q0, 0, 0); double *q0; d_zeros(&q0, 0, 1); // compute r0 = r + S*x0 double *r0; d_zeros(&r0, nu, 1); dcopy_3l(nu, r, 1, r0, 1); dgemv_n_3l(nu, nx, S, nu, x0, r0); // then S0 is a matrix of size nux0 double *S0; d_zeros(&S0, nu, 0); #endif /************************************************ * problems data ************************************************/ double *hA[N]; double *hB[N]; double *hb[N]; double *hQ[N + 1]; double *hS[N]; double *hR[N]; double *hq[N + 1]; double *hr[N]; double *hlb[N + 1]; double *hub[N + 1]; int *hidxb[N + 1]; double *hC[N + 1]; double *hD[N]; double *hlg[N + 1]; double *hug[N + 1]; #if defined(ELIMINATE_X0) hA[0] = A0; hb[0] = b0; hQ[0] = Q0; hS[0] = S0; hq[0] = q0; hr[0] = r0; #else hA[0] = A; hb[0] = b; hQ[0] = Q; hS[0] = S; hq[0] = q; hr[0] = r; #endif hB[0] = B; hR[0] = R; hlb[0] = lb0; hub[0] = ub0; hidxb[0] = idxb0; hC[0] = C; hD[0] = D; hlg[0] = lg; hug[0] = ug; for (ii = 1; ii < N; ii++) { hA[ii] = A; hB[ii] = B; hb[ii] = b; hQ[ii] = Q; hS[ii] = S; hR[ii] = R; hq[ii] = q; hr[ii] = r; hlb[ii] = lb1; hub[ii] = ub1; hidxb[ii] = idxb1; hC[ii] = C; hD[ii] = D; hlg[ii] = lg; hug[ii] = ug; } hQ[N] = Q; // or maybe initialize to the solution of the DARE??? hq[N] = q; // or maybe initialize to the solution of the DARE??? hlb[N] = lbN; hub[N] = ubN; hidxb[N] = idxbN; hC[N] = CN; hlg[N] = lgN; hug[N] = ugN; /************************************************ * solution ************************************************/ double *hx[N + 1]; double *hu[N]; double *hpi[N]; double *hlam[N + 1]; double *ht[N + 1]; for (ii = 0; ii < N; ii++) { d_zeros(&hx[ii], nxx[ii], 1); d_zeros(&hu[ii], nuu[ii], 1); d_zeros(&hpi[ii], nxx[ii + 1], 1); d_zeros(&hlam[ii], 2 * nbb[ii] + 2 * ngg[ii], 1); d_zeros(&ht[ii], 2 * nbb[ii] + 2 * ngg[ii], 1); } d_zeros(&hx[N], nxx[N], 1); d_zeros(&hlam[N], 2 * nbb[N] + 2 * ngg[N], 1); d_zeros(&ht[N], 2 * nbb[N] + 2 * ngg[N], 1); /************************************************ * create the in and out struct ************************************************/ ocp_qp_in qp_in; qp_in.N = N; qp_in.nx = (const int *)nxx; qp_in.nu = (const int *)nuu; qp_in.nb = (const int *)nbb; qp_in.nc = (const int *)ngg; qp_in.A = (const double **)hA; qp_in.B = (const double **)hB; qp_in.b = (const double **)hb; qp_in.Q = (const double **)hQ; qp_in.S = (const double **)hS; qp_in.R = (const double **)hR; qp_in.q = (const double **)hq; qp_in.r = (const double **)hr; qp_in.idxb = (const int **)hidxb; qp_in.lb = (const double **)hlb; qp_in.ub = (const double **)hub; qp_in.Cx = (const double **)hC; qp_in.Cu = (const double **)hD; qp_in.lc = (const double **)hlg; qp_in.uc = (const double **)hug; ocp_qp_out qp_out; qp_out.x = hx; qp_out.u = hu; qp_out.pi = hpi; qp_out.lam = hlam; qp_out.t = ht; // XXX why also the slack variables ??? /************************************************ * solver arguments (fully sparse) ************************************************/ // solver arguments ocp_qp_condensing_hpipm_args *hpipm_args = ocp_qp_condensing_hpipm_create_arguments(&qp_in); // hpipm_args->mu_max = TOL; // hpipm_args->iter_max = MAXITER; // hpipm_args->alpha_min = MINSTEP; hpipm_args->mu0 = 1.0; // 0.0 /************************************************ * work space (fully sparse) ************************************************/ int work_space_size = ocp_qp_condensing_hpipm_calculate_workspace_size(&qp_in, hpipm_args); printf("\nwork space size: %d bytes\n", work_space_size); void *workspace = malloc(work_space_size); // void *mem; // ocp_qp_hpipm_create_memory(&qp_in, hpipm_args, &mem); int memory_size = ocp_qp_condensing_hpipm_calculate_memory_size(&qp_in, hpipm_args); printf("\nmemory: %d bytes\n", memory_size); void *memory = malloc(memory_size); ocp_qp_condensing_hpipm_memory *hpipm_memory = ocp_qp_condensing_hpipm_create_memory(&qp_in, hpipm_args); /************************************************ * call the solver (fully sparse) ************************************************/ int return_value; acados_timer timer; acados_tic(&timer); // nrep = 1; for (rep = 0; rep < nrep; rep++) { // call the QP OCP solver // return_value = ocp_qp_hpipm(&qp_in, &qp_out, hpipm_args, // workspace); return_value = ocp_qp_condensing_hpipm(&qp_in, &qp_out, hpipm_args, hpipm_memory, workspace); } real_t time = acados_toc(&timer)/nrep; if (return_value == ACADOS_SUCCESS) printf("\nACADOS status: solution found in %d iterations\n", hpipm_memory->iter); if (return_value == ACADOS_MAXITER) printf("\nACADOS status: maximum number of iterations reached\n"); if (return_value == ACADOS_MINSTEP) printf("\nACADOS status: below minimum step size length\n"); printf("\nu = \n"); for (ii = 0; ii < N; ii++) d_print_mat(1, nuu[ii], hu[ii], 1); printf("\nx = \n"); for (ii = 0; ii <= N; ii++) d_print_mat(1, nxx[ii], hx[ii], 1); printf("\npi = \n"); for (ii = 0; ii < N; ii++) d_print_mat(1, nxx[ii+1], hpi[ii], 1); printf("\nlam = \n"); for (ii = 0; ii <= N; ii++) d_print_mat(1, 2*nbb[ii]+2*ngg[ii], hlam[ii], 1); printf("\n"); printf(" inf norm res: %e, %e, %e, %e, %e\n", hpipm_memory->inf_norm_res[0], hpipm_memory->inf_norm_res[1], hpipm_memory->inf_norm_res[2], hpipm_memory->inf_norm_res[3], hpipm_memory->inf_norm_res[4]); printf("\n"); printf( " Solution time for %d IPM iterations, averaged over %d runs: %5.2e " "seconds\n", hpipm_memory->iter, nrep, time); printf("\n\n"); /************************************************ * free memory ************************************************/ d_free(A); d_free(B); d_free(b); d_free(x0); d_free(Q); d_free(S); d_free(R); d_free(q); d_free(r); #if defined(ELIMINATE_X0) d_free(A0); d_free(b0); d_free(Q0); d_free(S0); d_free(q0); d_free(r0); #endif int_free(idxb0); d_free(lb0); d_free(ub0); int_free(idxb1); d_free(lb1); d_free(ub1); int_free(idxbN); d_free(lbN); d_free(ubN); d_free(C); d_free(D); d_free(lg); d_free(ug); d_free(CN); d_free(lgN); d_free(ugN); for (ii = 0; ii < N; ii++) { d_free(hx[ii]); d_free(hu[ii]); d_free(hpi[ii]); d_free(hlam[ii]); d_free(ht[ii]); } d_free(hx[N]); d_free(hlam[N]); d_free(ht[N]); free(workspace); free(memory); return 0; }