void FT_NLP() { int numprocs,myid,ID,tag=999; int count,NumSpe; int i,kj,num_k,so; int Lspe,spe,L,GL,MaxGL; double dk,norm_k; double rmin,rmax,r,r2,h,sum[2]; double **SphB; double *tmp_SphB,*tmp_SphBp; double TStime, TEtime; /* for MPI */ MPI_Status stat; MPI_Request request; /* for OpenMP */ int OMPID,Nthrds,Nprocs; dtime(&TStime); /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); if (myid==Host_ID) printf("<FT_NLP> Fourier transform of non-local projectors\n"); for (Lspe=0; Lspe<MSpeciesNum; Lspe++){ spe = Species_Top[myid] + Lspe; num_k = Ngrid_NormK; dk = PAO_Nkmax/(double)num_k; rmin = Spe_VPS_RV[spe][0]; rmax = Spe_Atom_Cut1[spe] + 0.5; h = (rmax - rmin)/(double)OneD_Grid; /* kj loop */ #pragma omp parallel shared(Spe_VPS_List,spe,Spe_Num_RVPS,num_k,dk,OneD_Grid,rmin,h,VPS_j_dependency,Spe_NLRF_Bessel) private(MaxGL,L,GL,SphB,tmp_SphB,tmp_SphBp,OMPID,Nthrds,Nprocs,norm_k,i,r,r2,sum,so,kj) { /* allocate SphB */ MaxGL = -1; for (L=1; L<=Spe_Num_RVPS[spe]; L++){ GL = Spe_VPS_List[spe][L]; if (MaxGL<GL) MaxGL = GL; } SphB = (double**)malloc(sizeof(double*)*(MaxGL+3)); for(GL=0; GL<(MaxGL+3); GL++){ SphB[GL] = (double*)malloc(sizeof(double)*(OneD_Grid+1)); } tmp_SphB = (double*)malloc(sizeof(double)*(MaxGL+3)); tmp_SphBp = (double*)malloc(sizeof(double)*(MaxGL+3)); /* get info. on OpenMP */ OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for ( kj=OMPID; kj<num_k; kj+=Nthrds ){ norm_k = (double)kj*dk; /* calculate SphB */ for (i=0; i<=OneD_Grid; i++){ r = rmin + (double)i*h; Spherical_Bessel(norm_k*r,MaxGL,tmp_SphB,tmp_SphBp); r2 = r*r; for(GL=0; GL<=MaxGL; GL++){ SphB[GL][i] = tmp_SphB[GL]*r2; } } for(GL=0; GL<=MaxGL; GL++){ SphB[GL][0] *= 0.5; SphB[GL][OneD_Grid] *= 0.5; } /* loof for L */ for (L=1; L<=Spe_Num_RVPS[spe]; L++){ GL = Spe_VPS_List[spe][L]; /**************************************************** \int jL(k*r)*RL*r^2 dr ****************************************************/ sum[0] = 0.0; sum[1] = 0.0; for (i=0; i<=OneD_Grid; i++){ r = rmin + (double)i*h; for (so=0; so<=VPS_j_dependency[spe]; so++){ sum[so] += Nonlocal_RadialF(spe,L-1,so,r)*SphB[GL][i]; } } for (so=0; so<=VPS_j_dependency[spe]; so++){ Spe_NLRF_Bessel[so][spe][L][kj] = sum[so]*h; } } /* L */ } /* kj */ /* free arrays */ for(GL=0; GL<(MaxGL+3); GL++){ free(SphB[GL]); } free(SphB); free(tmp_SphB); free(tmp_SphBp); #pragma omp flush(Spe_NLRF_Bessel) } /* #pragma omp parallel */ } /* Lspe */ /**************************************************** Remedy for MSpeciesNum==0 generate radial grids in the k-space ****************************************************/ dk = PAO_Nkmax/(double)Ngrid_NormK; for (i=0; i<Ngrid_NormK; i++){ NormK[i] = (double)i*dk; } /*********************************************************** sending and receiving of Spe_RF_Bessel by MPI ***********************************************************/ for (ID=0; ID<Num_Procs2; ID++){ NumSpe = Species_End[ID] - Species_Top[ID] + 1; for (Lspe=0; Lspe<NumSpe; Lspe++){ spe = Species_Top[ID] + Lspe; for (so=0; so<=VPS_j_dependency[spe]; so++){ for (L=1; L<=Spe_Num_RVPS[spe]; L++){ MPI_Bcast(&Spe_NLRF_Bessel[so][spe][L][0], List_YOUSO[15],MPI_DOUBLE,ID,mpi_comm_level1); } } } } /*********************************************************** elapsed time ***********************************************************/ dtime(&TEtime); /* printf("myid=%2d Elapsed Time (s) = %15.12f\n",myid,TEtime-TStime); MPI_Finalize(); exit(0); */ }
extern real fitGemRecomb(double *ct, double *time, double **ctFit, const int nData, t_gemParams *params) { int nThreads, i, iter, status, maxiter; real size, d2, tol, *dumpdata; size_t p, n; gemFitData *GD; char *dumpstr, dumpname[128]; /* nmsimplex2 had convergence problems prior to gsl v1.14, * but it's O(N) instead of O(N) operations, so let's use it if v >= 1.14 */ #ifdef HAVE_LIBGSL gsl_multimin_fminimizer *s; gsl_vector *x,*dx; /* parameters and initial step size */ gsl_multimin_function fitFunc; #ifdef GSL_MAJOR_VERSION #ifdef GSL_MINOR_VERSION #if ((GSL_MAJOR_VERSION == 1 && GSL_MINOR_VERSION >= 14) || \ (GSL_MAJOR_VERSION > 1)) const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2; #else const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex; #endif /* #if ... */ #endif /* GSL_MINOR_VERSION */ #else const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex; #endif /* GSL_MAJOR_VERSION */ fprintf(stdout, "Will fit ka and kd to the ACF according to the reversible geminate recombination model.\n"); #else /* HAVE_LIBGSL */ fprintf(stderr, "Sorry, can't do reversible geminate recombination without gsl. " "Recompile using --with-gsl.\n"); return -1; #endif /* HAVE_LIBGSL */ #ifdef HAVE_LIBGSL #ifdef HAVE_OPENMP nThreads = omp_get_num_procs(); omp_set_num_threads(nThreads); fprintf(stdout, "We will be using %i threads.\n", nThreads); #endif iter = 0; status = 0; maxiter = 100; tol = 1e-10; p = 2; /* Number of parameters to fit. ka and kd. */ n = params->nFitPoints; /* params->nLin*2 */; /* Number of points in the reduced dataset */ if (params->D <= 0) { fprintf(stderr, "Fitting of D is not implemented yet. It must be provided on the command line.\n"); return -1; } /* if (nData<n) { */ /* fprintf(stderr, "Reduced data set larger than the complete data set!\n"); */ /* n=nData; */ /* } */ snew(dumpdata, nData); snew(GD,1); GD->n = n; GD->y = ct; GD->ctTheory=NULL; snew(GD->ctTheory, nData); GD->LinLog=NULL; snew(GD->LinLog, n); GD->time = time; GD->ka = 0; GD->kd = 0; GD->tDelta = time[1]-time[0]; GD->nData = nData; GD->params = params; snew(GD->logtime,params->nFitPoints); snew(GD->doubleLogTime,params->nFitPoints); for (i=0; i<params->nFitPoints; i++) { GD->doubleLogTime[i] = (double)(getLogIndex(i, params)); GD->logtime[i] = (int)(GD->doubleLogTime[i]); GD->doubleLogTime[i]*=GD->tDelta; if (GD->logtime[i] >= nData) { fprintf(stderr, "Ayay. It seems we're indexing out of bounds.\n"); params->nFitPoints = i; } } fitFunc.f = &gemFunc_residual2; fitFunc.n = 2; fitFunc.params = (void*)GD; x = gsl_vector_alloc (fitFunc.n); dx = gsl_vector_alloc (fitFunc.n); gsl_vector_set (x, 0, 25); gsl_vector_set (x, 1, 0.5); gsl_vector_set (dx, 0, 0.1); gsl_vector_set (dx, 1, 0.01); s = gsl_multimin_fminimizer_alloc (T, fitFunc.n); gsl_multimin_fminimizer_set (s, &fitFunc, x, dx); gsl_vector_free (x); gsl_vector_free (dx); do { iter++; status = gsl_multimin_fminimizer_iterate (s); if (status != 0) gmx_fatal(FARGS,"Something went wrong in the iteration in minimizer %s:\n \"%s\"\n", gsl_multimin_fminimizer_name(s), gsl_strerror(status)); d2 = gsl_multimin_fminimizer_minimum(s); size = gsl_multimin_fminimizer_size(s); params->ka = gsl_vector_get (s->x, 0); params->kd = gsl_vector_get (s->x, 1); if (status) { fprintf(stderr, "%s\n", gsl_strerror(status)); break; } status = gsl_multimin_test_size(size,tol); if (status == GSL_SUCCESS) { fprintf(stdout, "Converged to minimum at\n"); } printf ("iter %5d: ka = %2.5f kd = %2.5f f() = %7.3f size = %.3f chi2 = %2.5f\n", iter, params->ka, params->kd, s->fval, size, d2); if (iter%1 == 0) { eq10v2(GD->ctTheory, time, nData, params->ka, params->kd, params); /* fixGemACF(GD->ctTheory, nFitPoints); */ sprintf(dumpname, "Iter_%i.xvg", iter); for(i=0; i<GD->nData; i++) { dumpdata[i] = (real)(GD->ctTheory[i]); if (!gmx_isfinite(dumpdata[i])) { gmx_fatal(FARGS, "Non-finite value in acf."); } } dumpN(dumpdata, GD->nData, dumpname); } } while ((status == GSL_CONTINUE) && (iter < maxiter)); /* /\* Calculate the theoretical ACF from the parameters one last time. *\/ */ eq10v2(GD->ctTheory, time, nData, params->ka, params->kd, params); *ctFit = GD->ctTheory; sfree(GD); gsl_multimin_fminimizer_free (s); return d2; #endif /* HAVE_LIBGSL */ }
unsigned int YsThreadController::GetNumCPU(void) { return omp_get_num_procs(); }
int dt_init(int argc, char *argv[], const int init_gui) { #ifndef __WIN32__ if(getuid() == 0 || geteuid() == 0) printf("WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n"); #endif // make everything go a lot faster. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #if !defined __APPLE__ && !defined __WIN32__ _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler); #endif #ifndef __GNUC_PREREQ // on OSX, gcc-4.6 and clang chokes if this is not here. #if defined __GNUC__ && defined __GNUC_MINOR__ # define __GNUC_PREREQ(maj, min) \ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) #else # define __GNUC_PREREQ(maj, min) 0 #endif #endif #ifndef __has_builtin // http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros #define __has_builtin(x) false #endif #ifndef __SSE3__ #error "Unfortunately we depend on SSE3 instructions at this time." #error "Please contribute a backport patch (or buy a newer processor)." #else #if (__GNUC_PREREQ(4,8) || __has_builtin(__builtin_cpu_supports)) //FIXME: check will work only in GCC 4.8+ !!! implement manual cpuid check !!! //NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC if (!__builtin_cpu_supports("sse3")) { fprintf(stderr, "[dt_init] unfortunately we depend on SSE3 instructions at this time.\n"); fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n"); return 1; } #else //FIXME: no way to check for SSE3 in runtime, implement manual cpuid check !!! #endif #endif #ifdef M_MMAP_THRESHOLD mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */ #endif // we have to have our share dir in XDG_DATA_DIRS, // otherwise GTK+ won't find our logo for the about screen (and maybe other things) { const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS"); gchar *new_xdg_data_dirs = NULL; gboolean set_env = TRUE; if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0') { // check if DARKTABLE_SHAREDIR is already in there gboolean found = FALSE; gchar **tokens = g_strsplit(xdg_data_dirs, ":", 0); // xdg_data_dirs is neither NULL nor empty => tokens != NULL for(char **iter = tokens; *iter != NULL; iter++) if(!strcmp(DARKTABLE_SHAREDIR, *iter)) { found = TRUE; break; } g_strfreev(tokens); if(found) set_env = FALSE; else new_xdg_data_dirs = g_strjoin(":", DARKTABLE_SHAREDIR, xdg_data_dirs, NULL); } else new_xdg_data_dirs = g_strdup(DARKTABLE_SHAREDIR); if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1); g_free(new_xdg_data_dirs); } setlocale(LC_ALL, ""); bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR); bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); textdomain (GETTEXT_PACKAGE); // init all pointers to 0: memset(&darktable, 0, sizeof(darktable_t)); darktable.progname = argv[0]; // database gchar *dbfilename_from_command = NULL; char *datadir_from_command = NULL; char *moduledir_from_command = NULL; char *tmpdir_from_command = NULL; char *configdir_from_command = NULL; char *cachedir_from_command = NULL; darktable.num_openmp_threads = 1; #ifdef _OPENMP darktable.num_openmp_threads = omp_get_num_procs(); #endif darktable.unmuted = 0; GSList *images_to_load = NULL, *config_override = NULL; for(int k=1; k<argc; k++) { if(argv[k][0] == '-') { if(!strcmp(argv[k], "--help")) { return usage(argv[0]); } if(!strcmp(argv[k], "-h")) { return usage(argv[0]); } else if(!strcmp(argv[k], "--version")) { printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2014 johannes hanika\n"PACKAGE_BUGREPORT"\n" #ifdef _OPENMP "OpenMP support enabled\n" #else "OpenMP support disabled\n" #endif ); return 1; } else if(!strcmp(argv[k], "--library")) { dbfilename_from_command = argv[++k]; } else if(!strcmp(argv[k], "--datadir")) { datadir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--moduledir")) { moduledir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--tmpdir")) { tmpdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--configdir")) { configdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--cachedir")) { cachedir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--localedir")) { bindtextdomain (GETTEXT_PACKAGE, argv[++k]); } else if(argv[k][1] == 'd' && argc > k+1) { if(!strcmp(argv[k+1], "all")) darktable.unmuted = 0xffffffff; // enable all debug information else if(!strcmp(argv[k+1], "cache")) darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module else if(!strcmp(argv[k+1], "control")) darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module else if(!strcmp(argv[k+1], "dev")) darktable.unmuted |= DT_DEBUG_DEV; // develop module else if(!strcmp(argv[k+1], "fswatch")) darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module else if(!strcmp(argv[k+1], "input")) darktable.unmuted |= DT_DEBUG_INPUT; // input devices else if(!strcmp(argv[k+1], "camctl")) darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module else if(!strcmp(argv[k+1], "perf")) darktable.unmuted |= DT_DEBUG_PERF; // performance measurements else if(!strcmp(argv[k+1], "pwstorage")) darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module else if(!strcmp(argv[k+1], "opencl")) darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl else if(!strcmp(argv[k+1], "sql")) darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries else if(!strcmp(argv[k+1], "memory")) darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then. else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff. else if(!strcmp(argv[k+1], "nan")) darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe. else if(!strcmp(argv[k+1], "masks")) darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff. else if(!strcmp(argv[k+1], "lua")) darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console else return usage(argv[0]); k ++; } else if(argv[k][1] == 't' && argc > k+1) { darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100); printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads); k ++; } else if(!strcmp(argv[k], "--conf")) { gchar *keyval = g_strdup(argv[++k]), *c = keyval; while(*c != '=' && c < keyval + strlen(keyval)) c++; if(*c == '=' && *(c+1) != '\0') { *c++ = '\0'; dt_conf_string_entry_t *entry = (dt_conf_string_entry_t*)g_malloc(sizeof(dt_conf_string_entry_t)); entry->key = g_strdup(keyval); entry->value = g_strdup(c); config_override = g_slist_append(config_override, entry); } g_free(keyval); } } #ifndef MAC_INTEGRATION else { images_to_load = g_slist_append(images_to_load, argv[k]); } #endif } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] at startup\n"); dt_print_mem_usage(); } #ifdef _OPENMP omp_set_num_threads(darktable.num_openmp_threads); #endif dt_loc_init_datadir(datadir_from_command); dt_loc_init_plugindir(moduledir_from_command); if(dt_loc_init_tmp_dir(tmpdir_from_command)) { printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir); return usage(argv[0]); } dt_loc_init_user_config_dir(configdir_from_command); dt_loc_init_user_cache_dir(cachedir_from_command); #if !GLIB_CHECK_VERSION(2, 35, 0) g_type_init(); #endif // does not work, as gtk is not inited yet. // even if it were, it's a super bad idea to invoke gtk stuff from // a signal handler. /* check cput caps */ // dt_check_cpu(argc,argv); #ifdef HAVE_GEGL char geglpath[PATH_MAX]; char datadir[PATH_MAX]; dt_loc_get_datadir(datadir, sizeof(datadir)); snprintf(geglpath, sizeof(geglpath), "%s/gegl:/usr/lib/gegl-0.0", datadir); (void)setenv("GEGL_PATH", geglpath, 1); gegl_init(&argc, &argv); #endif #ifdef USE_LUA dt_lua_init_early(NULL); #endif // thread-safe init: dt_exif_init(); char datadir[PATH_MAX]; dt_loc_get_user_config_dir (datadir, sizeof(datadir)); char filename[PATH_MAX]; snprintf(filename, sizeof(filename), "%s/darktablerc", datadir); // initialize the config backend. this needs to be done first... darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t)); dt_conf_init(darktable.conf, filename, config_override); g_slist_free_full(config_override, g_free); // set the interface language const gchar* lang = dt_conf_get_string("ui_last/gui_language"); // we may not g_free 'lang' since it is owned by setlocale afterwards if(lang != NULL && lang[0] != '\0') { if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale(); } // initialize the database darktable.db = dt_database_init(dbfilename_from_command); if(darktable.db == NULL) { printf("ERROR : cannot open database\n"); return 1; } else if(!dt_database_get_lock_acquired(darktable.db)) { // send the images to the other instance via dbus if(images_to_load) { GSList *p = images_to_load; // get a connection! GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL); while (p != NULL) { // make the filename absolute ... gchar *filename = dt_make_path_absolute((gchar*)p->data); if(filename == NULL) continue; // ... and send it to the running instance of darktable g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable", "org.darktable.service.Remote", "Open", g_variant_new ("(s)", filename), NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL); p = g_slist_next(p); g_free(filename); } g_slist_free(images_to_load); g_object_unref(connection); } return 1; } // Initialize the signal system darktable.signals = dt_control_signal_init(); // Make sure that the database and xmp files are in sync before starting the fswatch. // We need conf and db to be up and running for that which is the case here. // FIXME: is this also useful in non-gui mode? GList *changed_xmp_files = NULL; if(init_gui && dt_conf_get_bool("run_crawler_on_start")) { changed_xmp_files = dt_control_crawler_run(); } // Initialize the filesystem watcher darktable.fswatch=dt_fswatch_new(); #ifdef HAVE_GPHOTO2 // Initialize the camera control darktable.camctl=dt_camctl_new(); #endif // get max lighttable thumbnail size: darktable.thumbnail_width = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"), 200, 3000); darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000); // and make sure it can be mip-mapped all the way from mip4 to mip0 darktable.thumbnail_width /= 16; darktable.thumbnail_width *= 16; darktable.thumbnail_height /= 16; darktable.thumbnail_height *= 16; // Initialize the password storage engine darktable.pwstorage=dt_pwstorage_new(); // FIXME: move there into dt_database_t dt_pthread_mutex_init(&(darktable.db_insert), NULL); dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL); dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL); darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t)); if(init_gui) { dt_control_init(darktable.control); } else { if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:")) dt_gui_presets_init(); // init preset db schema. darktable.control->running = 0; darktable.control->accelerators = NULL; dt_pthread_mutex_init(&darktable.control->run_mutex, NULL); } // initialize collection query darktable.collection_listeners = NULL; darktable.collection = dt_collection_new(NULL); /* initialize selection */ darktable.selection = dt_selection_new(); /* capabilities set to NULL */ darktable.capabilities = NULL; #ifdef HAVE_GRAPHICSMAGICK /* GraphicsMagick init */ InitializeMagick(darktable.progname); #endif darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); #ifdef HAVE_OPENCL dt_opencl_init(darktable.opencl, argc, argv); #endif darktable.blendop = (dt_blendop_t *)calloc(1, sizeof(dt_blendop_t)); dt_develop_blend_init(darktable.blendop); darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); // must come before mipmap_cache, because that one will need to access // image dimensions stored in here: darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t)); dt_image_cache_init(darktable.image_cache); darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t)); dt_mipmap_cache_init(darktable.mipmap_cache); // The GUI must be initialized before the views, because the init() // functions of the views depend on darktable.control->accels_* to register // their keyboard accelerators if(init_gui) { darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t)); if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1; dt_bauhaus_init(); } else darktable.gui = NULL; darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t)); dt_view_manager_init(darktable.view_manager); // load the darkroom mode plugins once: dt_iop_load_modules_so(); if(init_gui) { darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t)); dt_lib_init(darktable.lib); dt_control_load_config(darktable.control); } darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t)); dt_imageio_init(darktable.imageio); if(init_gui) { // Loading the keybindings char keyfile[PATH_MAX]; // First dump the default keymapping snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir); gtk_accel_map_save(keyfile); // Removing extraneous semi-colons from the default keymap strip_semicolons_from_keymap(keyfile); // Then load any modified keys if available snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir); if(g_file_test(keyfile, G_FILE_TEST_EXISTS)) gtk_accel_map_load(keyfile); else gtk_accel_map_save(keyfile); // Save the default keymap if none is present // I doubt that connecting to dbus for darktable-cli makes sense darktable.dbus = dt_dbus_init(); // initialize undo struct darktable.undo = dt_undo_init(); // load image(s) specified on cmdline int id = 0; if(images_to_load) { // If only one image is listed, attempt to load it in darkroom gboolean load_in_dr = (g_slist_next(images_to_load) == NULL); GSList *p = images_to_load; while (p != NULL) { // don't put these function calls into MAX(), the macro will evaluate // it twice (and happily deadlock, in this particular case) int newid = dt_load_from_string((gchar*)p->data, load_in_dr); id = MAX(id, newid); p = g_slist_next(p); } if (!load_in_dr || id == 0) dt_ctl_switch_mode_to(DT_LIBRARY); g_slist_free(images_to_load); } else dt_ctl_switch_mode_to(DT_LIBRARY); } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] after successful startup\n"); dt_print_mem_usage(); } dt_image_local_copy_synch(); /* init lua last, since it's user made stuff it must be in the real environment */ #ifdef USE_LUA dt_lua_init(darktable.lua_state.state,init_gui); #endif // last but not least construct the popup that asks the user about images whose xmp files are newer than the db entry if(init_gui && changed_xmp_files) { dt_control_crawler_show_image_list(changed_xmp_files); } return 0; }
int main(int argc, char* argv[]) { int t = (argc > 1) ? atoi(argv[1]) : omp_get_num_procs(); ImageInverter ii("../assets/pics/Car-colors.jpg", 1022, 1024); ii.run(t); }
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: HELLO has each thread print out its ID. Discussion: HELLO is a "Hello, World" program for OpenMP. Licensing: This code is distributed under the GNU LGPL license. Modified: 23 June 2010 Author: John Burkardt */ { int id; double wtime; printf ( "\n" ); printf ( "HELLO_OPENMP\n" ); printf ( " C/OpenMP version\n" ); printf ( "\n" ); printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) ); printf ( " Number of threads = %d\n", omp_get_max_threads ( ) ); wtime = omp_get_wtime ( ); printf ( "\n" ); printf ( " OUTSIDE the parallel region.\n" ); printf ( "\n" ); id = omp_get_thread_num ( ); printf ( " HELLO from process %d\n", id ) ; printf ( "\n" ); printf ( " Going INSIDE the parallel region:\n" ); printf ( "\n" ); /* INSIDE THE PARALLEL REGION, have each thread say hello. */ # pragma omp parallel \ private ( id ) { id = omp_get_thread_num ( ); printf (" Hello from process %d\n", id ); } /* Finish up by measuring the elapsed time. */ wtime = omp_get_wtime ( ) - wtime; printf ( "\n" ); printf ( " Back OUTSIDE the parallel region.\n" ); /* Terminate. */ printf ( "\n" ); printf ( "HELLO_OPENMP\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); printf ( " Elapsed wall clock time = %f\n", wtime ); return 0; }
KernelDepth( "kernelDepth" ) , AdaptiveExponent( "adaptiveExp" , 1 ) , MinIters( "minIters" , 24 ) , FixedIters( "iters" , -1 ) , VoxelDepth( "voxelDepth" , -1 ) , #if 1 #ifdef _WIN32 #pragma message( "[WARNING] Setting default min-depth to 5" ) #endif // _WIN32 MinDepth( "minDepth" , 5 ) , #else MinDepth( "minDepth" , 0 ) , #endif MaxSolveDepth( "maxSolveDepth" ) , BoundaryType( "boundary" , 1 ) , Threads( "threads" , omp_get_num_procs() ); cmdLineFloat SamplesPerNode( "samplesPerNode" , 1.f ) , Scale( "scale" , 1.1f ) , SolverAccuracy( "accuracy" , float(1e-3) ) , PointWeight( "pointWeight" , 4.f ); cmdLineReadable* params[] = { &In , &Depth , &Out , &XForm , &SolverDivide , &IsoDivide , &Scale , &Verbose , &SolverAccuracy , &NoComments , &KernelDepth , &SamplesPerNode , &Confidence , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual , &MinIters , &FixedIters , &VoxelDepth , &PointWeight , &VoxelGrid , &Threads , &MinDepth , &MaxSolveDepth , &AdaptiveExponent , &BoundaryType ,
// Main int main(int argc, char* argv[]) { // Disabling Bzz OpenMP //bzzOpenMP = 0; // Dictionary setup OpenSMOKE_KPP_Dictionary dictionary; dictionary.ParseFile("Input.inp"); // Read input file OpenSMOKE_KPP_DataManager data(dictionary); // OpenMP //omp_set_dynamic(false); //omp_set_num_threads(12); cout << "Number of user defined threads: " << data.nThreads() << endl; cout << "Number of current threads: " << omp_get_num_threads() << endl; cout << "Master thread id: " << omp_get_thread_num() << endl; cout << "Number of processes: " << omp_get_num_procs() << endl; cout << "Max number of threads: " << omp_get_max_threads() << endl; // Initialize LIS lis_initialize(&argc, &argv); // Gas mixture setup cout << "Start" << endl; //getchar(); OpenSMOKE_ReactingGas* mix; if (data.iSaveKineticConstants() == false) { mix = new OpenSMOKE_ReactingGas[data.nThreads()]; for (int k=0;k<data.nThreads();k++) mix[k].SetupBinary(dictionary.kinetics()); } else { mix = new OpenSMOKE_ReactingGas[1]; mix[0].SetupBinary(dictionary.kinetics()); } cout << "Done: Mixture" << endl; //getchar(); // Reactor network setup OpenSMOKE_KPP_ReactorNetwork network(*mix, data, fLog, fWarning); cout << "Done: Network" << endl; //getchar(); network.ReadFirstGuess(); cout << "Done: FirstGuess" << endl; //getchar(); network.ReadTopology(); cout << "Done: Topology" << endl; //getchar(); network.BuildNetwork(); // Initial Analysis InputOutputAnalysis(*mix, network); MassUmbalances(network); string log_string = data.nameFolderOutput() + "/Log.out"; fLog.open( log_string.c_str(), ios::out); fLog.setf(ios::scientific); string warning_string = data.nameFolderOutput() + "/Warning.out"; fWarning.open( warning_string.c_str(), ios::out); fWarning.setf(ios::scientific); globalIteration = 1; // Without reactions if (data.iReactions() == false) { network.SolveWithoutReactions(); network.WriteMassFractionMap(); } // With reactions if (data.iReactions() == true) { double timeStartTotal = BzzGetCpuTime(); // SequenceCSTR(network, data); // PredictorCorrector(network, data); // GlobalODE(network, data); // GlobalNewtonMethod(network, data); globalODELinearSystemAbsoluteTolerance = 1.e-11; globalODELinearSystemRelativeTolerance = 1.e-9; globalNLSLinearSystemAbsoluteTolerance = 1.e-15; globalNLSLinearSystemRelativeTolerance = 1.e-12; int iFlagSequence; int iFlagODE; int iFlagNewtonMethod; for(int jj=1;jj<=3;jj++) { iFlagSequence = SequenceCSTR(network, data); iFlagODE = GlobalODE(network, data); globalODELinearSystemAbsoluteTolerance /= 10.; globalODELinearSystemRelativeTolerance /= 10.; } iFlagNewtonMethod = GlobalNewtonMethod(network, data); for(int jj=1;jj<=2;jj++) { globalODELinearSystemAbsoluteTolerance /= 10.; globalODELinearSystemRelativeTolerance /= 10.; iFlagSequence = SequenceCSTR(network, data); iFlagODE = GlobalODE(network, data); iFlagNewtonMethod = GlobalNewtonMethod(network, data); } double timeEndTotal = BzzGetCpuTime(); // Final Analysis InputOutputAnalysis(*mix, network); MassUmbalances(network); cout << "Total CPU Time: " << timeEndTotal - timeStartTotal << endl; } }
void Set_XC_Grid(int XC_P_switch, int XC_switch) { /**************************************************** XC_P_switch: 0 \epsilon_XC (XC energy density) 1 \mu_XC (XC potential) 2 \epsilon_XC - \mu_XC ****************************************************/ int MN,MN1,MN2,i,j,k,ri,ri1,ri2; int i1,i2,j1,j2,k1,k2,n,nmax; double den_min=1.0e-14; double Ec_unif[1],Vc_unif[2],Exc[2],Vxc[2]; double Ex_unif[1],Vx_unif[2],tot_den; double ED[2],GDENS[3][2]; double DEXDD[2],DECDD[2]; double DEXDGD[3][2],DECDGD[3][2]; double ***dEXC_dGD,***dDen_Grid; double up_x_a,up_x_b,up_x_c; double up_y_a,up_y_b,up_y_c; double up_z_a,up_z_b,up_z_c; double dn_x_a,dn_x_b,dn_x_c; double dn_y_a,dn_y_b,dn_y_c; double dn_z_a,dn_z_b,dn_z_c; double up_a,up_b,up_c; double dn_a,dn_b,dn_c; double tmp0,tmp1; double cot,sit,sip,cop,phi,theta; double detA,igtv[4][4]; int numprocs,myid; /* for OpenMP */ int OMPID,Nthrds,Nprocs; /**************************************************** when GGA, allocation double dEXC_dGD[2][3][My_NumGrid1] double dDen_Grid[2][3][My_NumGrid1] ****************************************************/ /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); if (XC_switch==4){ dDen_Grid = (double***)malloc(sizeof(double**)*2); for (k=0; k<=1; k++){ dDen_Grid[k] = (double**)malloc(sizeof(double*)*3); for (i=0; i<3; i++){ dDen_Grid[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); for (j=0; j<My_NumGrid1; j++) dDen_Grid[k][i][j] = 0.0; } } if (XC_P_switch!=0){ dEXC_dGD = (double***)malloc(sizeof(double**)*2); for (k=0; k<=1; k++){ dEXC_dGD[k] = (double**)malloc(sizeof(double*)*3); for (i=0; i<3; i++){ dEXC_dGD[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); for (j=0; j<My_NumGrid1; j++) dEXC_dGD[k][i][j] = 0.0; } } } /* PrintMemory */ PrintMemory("Set_XC_Grid: dDen_Grid", sizeof(double)*6*My_NumGrid1, NULL); PrintMemory("Set_XC_Grid: dEXC_dGD", sizeof(double)*6*My_NumGrid1, NULL); /**************************************************** calculate dDen_Grid ****************************************************/ detA = gtv[1][1]*gtv[2][2]*gtv[3][3] + gtv[1][2]*gtv[2][3]*gtv[3][1] + gtv[1][3]*gtv[2][1]*gtv[3][2] - gtv[1][3]*gtv[2][2]*gtv[3][1] - gtv[1][2]*gtv[2][1]*gtv[3][3] - gtv[1][1]*gtv[2][3]*gtv[3][2]; igtv[1][1] = (gtv[2][2]*gtv[3][3] - gtv[2][3]*gtv[3][2])/detA; igtv[2][1] = -(gtv[2][1]*gtv[3][3] - gtv[2][3]*gtv[3][1])/detA; igtv[3][1] = (gtv[2][1]*gtv[3][2] - gtv[2][2]*gtv[3][1])/detA; igtv[1][2] = -(gtv[1][2]*gtv[3][3] - gtv[1][3]*gtv[3][2])/detA; igtv[2][2] = (gtv[1][1]*gtv[3][3] - gtv[1][3]*gtv[3][1])/detA; igtv[3][2] = -(gtv[1][1]*gtv[3][2] - gtv[1][2]*gtv[3][1])/detA; igtv[1][3] = (gtv[1][2]*gtv[2][3] - gtv[1][3]*gtv[2][2])/detA; igtv[2][3] = -(gtv[1][1]*gtv[2][3] - gtv[1][3]*gtv[2][1])/detA; igtv[3][3] = (gtv[1][1]*gtv[2][2] - gtv[1][2]*gtv[2][1])/detA; #pragma omp parallel shared(igtv,dDen_Grid,PCCDensity_Grid,PCC_switch,Density_Grid,den_min,My_Cell0,My_Cell1,Ngrid3,Ngrid2,Num_Cells0) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_a,dn_a,up_b,dn_b,up_c,dn_c) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); nmax = Num_Cells0*Ngrid2*Ngrid3; for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){ i = n/(Ngrid2*Ngrid3); j = (n-i*Ngrid2*Ngrid3)/Ngrid3; k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; ri = My_Cell1[i]; /* find ri1, ri2, i1, and i2 */ if (ri==0){ ri1 = Ngrid1 - 1; ri2 = 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else if (ri==(Ngrid1-1)){ ri1 = Ngrid1 - 2; ri2 = 0; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else{ ri1 = ri - 1; ri2 = ri + 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } /* because we have +-1 buffer cells. */ if (i1!=-1 && i2!=-1){ /* find j1 and j2 */ if (j==0){ j1 = Ngrid2 - 1; j2 = 1; } else if (j==(Ngrid2-1)){ j1 = Ngrid2 - 2; j2 = 0; } else{ j1 = j - 1; j2 = j + 1; } /* find k1 and k2 */ if (k==0){ k1 = Ngrid3 - 1; k2 = 1; } else if (k==(Ngrid3-1)){ k1 = Ngrid3 - 2; k2 = 0; } else{ k1 = k - 1; k2 = k + 1; } /* set MN */ MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; /* set dDen_Grid */ if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){ /* a-axis */ MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k; MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k; if (PCC_switch==0) { up_a = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_a = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_a = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_a = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* b-axis */ MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; if (PCC_switch==0) { up_b = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_b = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_b = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_b = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* c-axis */ MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; if (PCC_switch==0) { up_c = Density_Grid[0][MN2] - Density_Grid[0][MN1]; dn_c = Density_Grid[1][MN2] - Density_Grid[1][MN1]; } else if (PCC_switch==1) { up_c = Density_Grid[0][MN2] + PCCDensity_Grid[MN2] - Density_Grid[0][MN1] - PCCDensity_Grid[MN1]; dn_c = Density_Grid[1][MN2] + PCCDensity_Grid[MN2] - Density_Grid[1][MN1] - PCCDensity_Grid[MN1]; } /* up */ dDen_Grid[0][0][MN] = 0.5*(igtv[1][1]*up_a + igtv[1][2]*up_b + igtv[1][3]*up_c); dDen_Grid[0][1][MN] = 0.5*(igtv[2][1]*up_a + igtv[2][2]*up_b + igtv[2][3]*up_c); dDen_Grid[0][2][MN] = 0.5*(igtv[3][1]*up_a + igtv[3][2]*up_b + igtv[3][3]*up_c); /* down */ dDen_Grid[1][0][MN] = 0.5*(igtv[1][1]*dn_a + igtv[1][2]*dn_b + igtv[1][3]*dn_c); dDen_Grid[1][1][MN] = 0.5*(igtv[2][1]*dn_a + igtv[2][2]*dn_b + igtv[2][3]*dn_c); dDen_Grid[1][2][MN] = 0.5*(igtv[3][1]*dn_a + igtv[3][2]*dn_b + igtv[3][3]*dn_c); } else{ dDen_Grid[0][0][MN] = 0.0; dDen_Grid[0][1][MN] = 0.0; dDen_Grid[0][2][MN] = 0.0; dDen_Grid[1][0][MN] = 0.0; dDen_Grid[1][1][MN] = 0.0; dDen_Grid[1][2][MN] = 0.0; } } /* if (i1!=-1 && i2!=-1) */ } /* n */ #pragma omp flush(dDen_Grid) } /* #pragma omp parallel */ } /* if (XC_switch==4) */ /**************************************************** loop MN ****************************************************/ #pragma omp parallel shared(dDen_Grid,dEXC_dGD,den_min,Vxc_Grid,My_NumGrid1,XC_P_switch,XC_switch,Density_Grid,PCC_switch,PCCDensity_Grid) private(OMPID,Nthrds,Nprocs,MN,tot_den,tmp0,ED,Exc,Ec_unif,Vc_unif,Vxc,Ex_unif,Vx_unif,GDENS,DEXDD,DECDD,DEXDGD,DECDGD) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){ switch(XC_switch){ /****************************************************************** LDA (Ceperly-Alder) constructed by Ceperly and Alder, ref. D. M. Ceperley, Phys. Rev. B18, 3126 (1978) D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) and parametrized by Perdew and Zunger. ref. J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981) ******************************************************************/ case 1: tot_den = Density_Grid[0][MN] + Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { tot_den += PCCDensity_Grid[MN]*2.0; } tmp0 = XC_Ceperly_Alder(tot_den,XC_P_switch); Vxc_Grid[0][MN] = tmp0; Vxc_Grid[1][MN] = tmp0; break; /****************************************************************** LSDA-CA (Ceperly-Alder) constructed by Ceperly and Alder, ref. D. M. Ceperley, Phys. Rev. B18, 3126 (1978) D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) and parametrized by Perdew and Zunger. ref. J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981) ******************************************************************/ case 2: ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } XC_CA_LSDA(ED[0], ED[1], Exc, XC_P_switch); Vxc_Grid[0][MN] = Exc[0]; Vxc_Grid[1][MN] = Exc[1]; break; /****************************************************************** LSDA-PW (PW91) used as Grad\rho = 0 in their GGA formalism ref. J.P.Perdew and Yue Wang, Phys. Rev. B45, 13244 (1992) ******************************************************************/ case 3: ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; /* partial core correction */ if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } if ((ED[0]+ED[1])<den_min){ Vxc_Grid[0][MN] = 0.0; Vxc_Grid[1][MN] = 0.0; } else{ if (XC_P_switch==0){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc[0] = Vc_unif[0]; Vxc[1] = Vc_unif[1]; Exc[0] = Ec_unif[0]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc[0] = Vxc[0] + Vx_unif[0]; Exc[1] = 2.0*ED[0]*Ex_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc[1] += Vx_unif[0]; Exc[1] += 2.0*ED[1]*Ex_unif[0]; Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]); Vxc_Grid[0][MN] = Exc[0] + Exc[1]; Vxc_Grid[1][MN] = Exc[0] + Exc[1]; } else if (XC_P_switch==1){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc_Grid[0][MN] = Vc_unif[0]; Vxc_Grid[1][MN] = Vc_unif[1]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc_Grid[0][MN] = Vxc_Grid[0][MN] + Vx_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc_Grid[1][MN] = Vxc_Grid[1][MN] + Vx_unif[0]; } else if (XC_P_switch==2){ XC_PW91C(ED,Ec_unif,Vc_unif); Vxc[0] = Vc_unif[0]; Vxc[1] = Vc_unif[1]; Exc[0] = Ec_unif[0]; XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif); Vxc[0] = Vxc[0] + Vx_unif[0]; Exc[1] = 2.0*ED[0]*Ex_unif[0]; XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif); Vxc[1] += Vx_unif[0]; Exc[1] += 2.0*ED[1]*Ex_unif[0]; Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]); Vxc_Grid[0][MN] = Exc[0] + Exc[1] - Vxc[0]; Vxc_Grid[1][MN] = Exc[0] + Exc[1] - Vxc[1]; } } break; /****************************************************************** GGA-PBE ref. J. P. Perdew, K. Burke, and M. Ernzerhof, Phys. Rev. Lett. 77, 3865 (1996). ******************************************************************/ case 4: /**************************************************** ED[0] density of up spin: n_up ED[1] density of down spin: n_down GDENS[0][0] derivative (x) of density of up spin GDENS[1][0] derivative (y) of density of up spin GDENS[2][0] derivative (z) of density of up spin GDENS[0][1] derivative (x) of density of down spin GDENS[1][1] derivative (y) of density of down spin GDENS[2][1] derivative (z) of density of down spin DEXDD[0] d(fx)/d(n_up) DEXDD[1] d(fx)/d(n_down) DECDD[0] d(fc)/d(n_up) DECDD[1] d(fc)/d(n_down) n'_up_x = d(n_up)/d(x) n'_up_y = d(n_up)/d(y) n'_up_z = d(n_up)/d(z) n'_down_x = d(n_down)/d(x) n'_down_y = d(n_down)/d(y) n'_down_z = d(n_down)/d(z) DEXDGD[0][0] d(fx)/d(n'_up_x) DEXDGD[1][0] d(fx)/d(n'_up_y) DEXDGD[2][0] d(fx)/d(n'_up_z) DEXDGD[0][1] d(fx)/d(n'_down_x) DEXDGD[1][1] d(fx)/d(n'_down_y) DEXDGD[2][1] d(fx)/d(n'_down_z) DECDGD[0][0] d(fc)/d(n'_up_x) DECDGD[1][0] d(fc)/d(n'_up_y) DECDGD[2][0] d(fc)/d(n'_up_z) DECDGD[0][1] d(fc)/d(n'_down_x) DECDGD[1][1] d(fc)/d(n'_down_y) DECDGD[2][1] d(fc)/d(n'_down_z) ****************************************************/ ED[0] = Density_Grid[0][MN]; ED[1] = Density_Grid[1][MN]; if ((ED[0]+ED[1])<den_min){ Vxc_Grid[0][MN] = 0.0; Vxc_Grid[1][MN] = 0.0; /* later add its derivatives */ if (XC_P_switch!=0){ dEXC_dGD[0][0][MN] = 0.0; dEXC_dGD[0][1][MN] = 0.0; dEXC_dGD[0][2][MN] = 0.0; dEXC_dGD[1][0][MN] = 0.0; dEXC_dGD[1][1][MN] = 0.0; dEXC_dGD[1][2][MN] = 0.0; } } else{ GDENS[0][0] = dDen_Grid[0][0][MN]; GDENS[1][0] = dDen_Grid[0][1][MN]; GDENS[2][0] = dDen_Grid[0][2][MN]; GDENS[0][1] = dDen_Grid[1][0][MN]; GDENS[1][1] = dDen_Grid[1][1][MN]; GDENS[2][1] = dDen_Grid[1][2][MN]; if (PCC_switch==1) { ED[0] += PCCDensity_Grid[MN]; ED[1] += PCCDensity_Grid[MN]; } XC_PBE(ED, GDENS, Exc, DEXDD, DECDD, DEXDGD, DECDGD); /* XC energy density */ if (XC_P_switch==0){ Vxc_Grid[0][MN] = Exc[0] + Exc[1]; Vxc_Grid[1][MN] = Exc[0] + Exc[1]; } /* XC potential */ else if (XC_P_switch==1){ Vxc_Grid[0][MN] = DEXDD[0] + DECDD[0]; Vxc_Grid[1][MN] = DEXDD[1] + DECDD[1]; } /* XC energy density - XC potential */ else if (XC_P_switch==2){ Vxc_Grid[0][MN] = Exc[0] + Exc[1] - DEXDD[0] - DECDD[0]; Vxc_Grid[1][MN] = Exc[0] + Exc[1] - DEXDD[1] - DECDD[1]; } /* later add its derivatives */ if (XC_P_switch!=0){ dEXC_dGD[0][0][MN] = DEXDGD[0][0] + DECDGD[0][0]; dEXC_dGD[0][1][MN] = DEXDGD[1][0] + DECDGD[1][0]; dEXC_dGD[0][2][MN] = DEXDGD[2][0] + DECDGD[2][0]; dEXC_dGD[1][0][MN] = DEXDGD[0][1] + DECDGD[0][1]; dEXC_dGD[1][1][MN] = DEXDGD[1][1] + DECDGD[1][1]; dEXC_dGD[1][2][MN] = DEXDGD[2][1] + DECDGD[2][1]; } } break; } /* switch(XC_switch) */ } /* MN */ #pragma omp flush(dEXC_dGD) } /* #pragma omp parallel */ /**************************************************** calculate the second part of XC potential when GGA and XC_P_switch!=0 ****************************************************/ if (XC_switch==4 && XC_P_switch!=0){ #pragma omp parallel shared(XC_P_switch,Vxc_Grid,igtv,dEXC_dGD,Density_Grid,den_min,My_Cell0,My_Cell1,Num_Cells0,Ngrid2,Ngrid3) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_x_a,up_y_a,up_z_a,dn_x_a,dn_y_a,dn_z_a,up_x_b,up_y_b,up_z_b,dn_x_b,dn_y_b,dn_z_b,up_x_c,up_y_c,up_z_c,dn_x_c,dn_y_c,dn_z_c,tmp0,tmp1) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); nmax = Num_Cells0*Ngrid2*Ngrid3; for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){ i = n/(Ngrid2*Ngrid3); j = (n-i*Ngrid2*Ngrid3)/Ngrid3; k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; ri = My_Cell1[i]; /* find ri1, ri2, i1, and i2 */ if (ri==0){ ri1 = Ngrid1 - 1; ri2 = 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else if (ri==(Ngrid1-1)){ ri1 = Ngrid1 - 2; ri2 = 0; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } else{ ri1 = ri - 1; ri2 = ri + 1; i1 = My_Cell0[ri1]; i2 = My_Cell0[ri2]; } if (i1!=-1 && i2!=-1){ /* find j1 and j2 */ if (j==0){ j1 = Ngrid2 - 1; j2 = 1; } else if (j==(Ngrid2-1)){ j1 = Ngrid2 - 2; j2 = 0; } else{ j1 = j - 1; j2 = j + 1; } /* find k1 and k2 */ if (k==0){ k1 = Ngrid3 - 1; k2 = 1; } else if (k==(Ngrid3-1)){ k1 = Ngrid3 - 2; k2 = 0; } else{ k1 = k - 1; k2 = k + 1; } /* set MN */ MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; /* set Vxc_Grid */ if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){ /* a-axis */ MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k; MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k; up_x_a = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_a = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_a = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_a = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_a = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_a = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* b-axis */ MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; up_x_b = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_b = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_b = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_b = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_b = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_b = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* c-axis */ MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; up_x_c = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1]; up_y_c = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1]; up_z_c = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1]; dn_x_c = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1]; dn_y_c = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1]; dn_z_c = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1]; /* up */ tmp0 = igtv[1][1]*up_x_a + igtv[1][2]*up_x_b + igtv[1][3]*up_x_c + igtv[2][1]*up_y_a + igtv[2][2]*up_y_b + igtv[2][3]*up_y_c + igtv[3][1]*up_z_a + igtv[3][2]*up_z_b + igtv[3][3]*up_z_c; tmp0 = 0.5*tmp0; /* down */ tmp1 = igtv[1][1]*dn_x_a + igtv[1][2]*dn_x_b + igtv[1][3]*dn_x_c + igtv[2][1]*dn_y_a + igtv[2][2]*dn_y_b + igtv[2][3]*dn_y_c + igtv[3][1]*dn_z_a + igtv[3][2]*dn_z_b + igtv[3][3]*dn_z_c; tmp1 = 0.5*tmp1; /* XC potential */ if (XC_P_switch==1){ Vxc_Grid[0][MN] -= tmp0; Vxc_Grid[1][MN] -= tmp1; } /* XC energy density - XC potential */ else if (XC_P_switch==2){ Vxc_Grid[0][MN] += tmp0; Vxc_Grid[1][MN] += tmp1; } } } } #pragma omp flush(Vxc_Grid) } /* #pragma omp parallel */ } /* if (XC_switch==4 && XC_P_switch!=0) */ /**************************************************** In case of non-collinear spin DFT ****************************************************/ if (SpinP_switch==3 && XC_P_switch!=2){ #pragma omp parallel shared(Density_Grid,Vxc_Grid,My_NumGrid1) private(OMPID,Nthrds,Nprocs,MN,tmp0,tmp1,theta,phi,sit,cot,sip,cop) { OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){ tmp0 = 0.5*(Vxc_Grid[0][MN] + Vxc_Grid[1][MN]); tmp1 = 0.5*(Vxc_Grid[0][MN] - Vxc_Grid[1][MN]); theta = Density_Grid[2][MN]; phi = Density_Grid[3][MN]; sit = sin(theta); cot = cos(theta); sip = sin(phi); cop = cos(phi); Vxc_Grid[0][MN] = tmp0 + cot*tmp1; /* Re Vxc11 */ Vxc_Grid[1][MN] = tmp0 - cot*tmp1; /* Re Vxc22 */ Vxc_Grid[2][MN] = tmp1*sit*cop; /* Re Vxc12 */ Vxc_Grid[3][MN] = -tmp1*sit*sip; /* Im Vxc12 */ } #pragma omp flush(Vxc_Grid) } /* #pragma omp parallel */ } /* { int hN1,hN2,hN3,i; double Re11,Re22,Re12,Im12; hN1 = Ngrid1/2; hN2 = Ngrid2/2; hN3 = Ngrid3/2; for (i=0; i<Num_Cells0; i++){ MN = i*Ngrid2*Ngrid3 + hN2*Ngrid3 + hN3; Re11 = Vxc_Grid[0][MN]; Re22 = Vxc_Grid[1][MN]; Re12 = Vxc_Grid[2][MN]; Im12 = Vxc_Grid[3][MN]; printf("MN=%4d %15.12f %15.12f %15.12f %15.12f\n", MN,Re11,Re22,Re12,Im12); } } MPI_Finalize(); exit(0); */ /**************************************************** In case of GGA, free arrays double dEXC_dGD[2][3][My_NumGrid1] double dDen_Grid[2][3][My_NumGrid1] ****************************************************/ if (XC_switch==4){ for (k=0; k<=1; k++){ for (i=0; i<3; i++){ free(dDen_Grid[k][i]); } free(dDen_Grid[k]); } free(dDen_Grid); if (XC_P_switch!=0){ for (k=0; k<=1; k++){ for (i=0; i<3; i++){ free(dEXC_dGD[k][i]); } free(dEXC_dGD[k]); } free(dEXC_dGD); } } }
inline void callFunction(mxArray* plhs[], const mxArray*prhs[], const int nlhs,const int nrhs) { if (!mexCheckType<T>(prhs[0])) mexErrMsgTxt("type of argument 1 is not consistent"); if (!mxIsStruct(prhs[1])) mexErrMsgTxt("argument 2 should be struct"); if (nrhs == 3) if (!mxIsStruct(prhs[2])) mexErrMsgTxt("argument 3 should be struct"); Data<T> *X; const mwSize* dimsX=mxGetDimensions(prhs[0]); int n=static_cast<int>(dimsX[0]); int M=static_cast<int>(dimsX[1]); if (mxIsSparse(prhs[0])) { double * X_v=static_cast<double*>(mxGetPr(prhs[0])); mwSize* X_r=mxGetIr(prhs[0]); mwSize* X_pB=mxGetJc(prhs[0]); mwSize* X_pE=X_pB+1; int* X_r2, *X_pB2, *X_pE2; T* X_v2; createCopySparse<T>(X_v2,X_r2,X_pB2,X_pE2, X_v,X_r,X_pB,X_pE,M); X = new SpMatrix<T>(X_v2,X_r2,X_pB2,X_pE2,n,M,X_pB2[M]); } else { T* prX = reinterpret_cast<T*>(mxGetPr(prhs[0])); X= new Matrix<T>(prX,n,M); } int NUM_THREADS = getScalarStructDef<int>(prhs[1],"numThreads",-1); #ifdef _OPENMP NUM_THREADS = NUM_THREADS == -1 ? omp_get_num_procs() : NUM_THREADS; #else NUM_THREADS=1; #endif int batch_size = getScalarStructDef<int>(prhs[1],"batchsize", 256*(NUM_THREADS+1)); mxArray* pr_D = mxGetField(prhs[1],0,"D"); Trainer<T>* trainer; if (!pr_D) { int K = getScalarStruct<int>(prhs[1],"K"); trainer = new Trainer<T>(K,batch_size,NUM_THREADS); } else { T* prD = reinterpret_cast<T*>(mxGetPr(pr_D)); const mwSize* dimsD=mxGetDimensions(pr_D); int nD=static_cast<int>(dimsD[0]); int K=static_cast<int>(dimsD[1]); if (n != nD) mexErrMsgTxt("sizes of D are not consistent"); Matrix<T> D1(prD,n,K); if (nrhs == 3) { mxArray* pr_A = mxGetField(prhs[2],0,"A"); if (!pr_A) mexErrMsgTxt("field A is not provided"); T* prA = reinterpret_cast<T*>(mxGetPr(pr_A)); const mwSize* dimsA=mxGetDimensions(pr_A); int xA=static_cast<int>(dimsA[0]); int yA=static_cast<int>(dimsA[1]); if (xA != K || yA != K) mexErrMsgTxt("Size of A is not consistent"); Matrix<T> A(prA,K,K); mxArray* pr_B = mxGetField(prhs[2],0,"B"); if (!pr_B) mexErrMsgTxt("field B is not provided"); T* prB = reinterpret_cast<T*>(mxGetPr(pr_B)); const mwSize* dimsB=mxGetDimensions(pr_B); int xB=static_cast<int>(dimsB[0]); int yB=static_cast<int>(dimsB[1]); if (xB != n || yB != K) mexErrMsgTxt("Size of B is not consistent"); Matrix<T> B(prB,n,K); int iter = getScalarStruct<int>(prhs[2],"iter"); trainer = new Trainer<T>(A,B,D1,iter,batch_size,NUM_THREADS); } else { trainer = new Trainer<T>(D1,batch_size,NUM_THREADS); } } ParamDictLearn<T> param; param.lambda = getScalarStruct<T>(prhs[1],"lambda"); param.lambda2 = getScalarStructDef<T>(prhs[1],"lambda2",10e-10); param.iter=getScalarStruct<int>(prhs[1],"iter"); param.t0 = getScalarStructDef<T>(prhs[1],"t0",1e-5); param.mode =(constraint_type)getScalarStructDef<int>(prhs[1],"mode",PENALTY); param.posAlpha = getScalarStructDef<bool>(prhs[1],"posAlpha",false); param.posD = getScalarStructDef<bool>(prhs[1],"posD",false); param.expand= getScalarStructDef<bool>(prhs[1],"expand",false); param.modeD=(constraint_type_D)getScalarStructDef<int>(prhs[1],"modeD",L2); param.whiten = getScalarStructDef<bool>(prhs[1],"whiten",false); param.clean = getScalarStructDef<bool>(prhs[1],"clean",true); param.verbose = getScalarStructDef<bool>(prhs[1],"verbose",true); param.gamma1 = getScalarStructDef<T>(prhs[1],"gamma1",0); param.gamma2 = getScalarStructDef<T>(prhs[1],"gamma2",0); param.rho = getScalarStructDef<T>(prhs[1],"rho",T(1.0)); param.stochastic = getScalarStructDef<bool>(prhs[1],"stochastic_deprecated", false); param.modeParam = static_cast<mode_compute>(getScalarStructDef<int>(prhs[1],"modeParam",0)); param.batch = getScalarStructDef<bool>(prhs[1],"batch",false); param.iter_updateD = getScalarStructDef<T>(prhs[1],"iter_updateD",param.batch ? 5 : 1); param.log = getScalarStructDef<bool>(prhs[1],"log_deprecated", false); if (param.log) { mxArray *stringData = mxGetField(prhs[1],0, "logName_deprecated"); if (!stringData) mexErrMsgTxt("Missing field logName_deprecated"); int stringLength = mxGetN(stringData)+1; param.logName= new char[stringLength]; mxGetString(stringData,param.logName,stringLength); } trainer->train(*X,param); if (param.log) mxFree(param.logName); Matrix<T> D; trainer->getD(D); int K = D.n(); plhs[0] = createMatrix<T>(n,K); T* prD2 = reinterpret_cast<T*>(mxGetPr(plhs[0])); Matrix<T> D2(prD2,n,K); D2.copy(D); if (nlhs == 2) { mwSize dims[1] = {1}; int nfields=3; const char *names[] = {"A", "B", "iter"}; plhs[1]=mxCreateStructArray(1, dims,nfields, names); mxArray* prA = createMatrix<T>(K,K); T* pr_A= reinterpret_cast<T*>(mxGetPr(prA)); Matrix<T> A(pr_A,K,K); trainer->getA(A); mxSetField(plhs[1],0,"A",prA); mxArray* prB = createMatrix<T>(n,K); T* pr_B= reinterpret_cast<T*>(mxGetPr(prB)); Matrix<T> B(pr_B,n,K); trainer->getB(B); mxSetField(plhs[1],0,"B",prB); mxArray* priter = createScalar<T>(); *mxGetPr(priter) = static_cast<T>(trainer->getIter()); mxSetField(plhs[1],0,"iter",priter); } delete(trainer); delete(X); }
void Voronoi_Charge() { double time0; int Mc_AN,Gc_AN,Mh_AN,h_AN,Gh_AN; int Cwan,GNc,GRc,Nog,Nh,MN,spin; double x,y,z,dx,dy,dz,fw; double Cxyz[4]; double FuzzyW,sum0,sum1; double magx,magy,magz; double tmagx,tmagy,tmagz; double tden,tmag,theta,phi,rho,mag; double den0,den1,vol; double VC_S,T_VC0,T_VC1; double **VC,*Voronoi_Vol; double TStime,TEtime; double S_coordinate[3]; int numprocs,myid,tag=999,ID; FILE *fp_VC; char file_VC[YOUSO10]; char buf[fp_bsize]; /* setvbuf */ MPI_Status stat; MPI_Request request; /* for OpenMP */ int OMPID,Nthrds,Nprocs; MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); dtime(&TStime); if (myid==Host_ID) printf("\n<Voronoi_Charge> calculate Voronoi charges\n");fflush(stdout); /***************************************************** allocation of array *****************************************************/ VC = (double**)malloc(sizeof(double*)*4); for (spin=0; spin<4; spin++){ VC[spin] = (double*)malloc(sizeof(double)*(atomnum+1)); } Voronoi_Vol = (double*)malloc(sizeof(double)*(atomnum+1)); /***************************************************** calculation of Voronoi charge *****************************************************/ #pragma omp parallel shared(S_coordinate,GridVol,VC,Voronoi_Vol,Density_Grid,SpinP_switch,MGridListAtom,atv,CellListAtom,GridListAtom,NumOLG,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,Cwan,sum0,sum1,vol,tden,tmagx,tmagy,tmagz,Nog,GNc,GRc,Cxyz,x,y,z,FuzzyW,MN,den0,den1,theta,phi,rho,mag,magx,magy,magz,tmag) { /* get info. on OpenMP */ OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){ Gc_AN = M2G[Mc_AN]; Cwan = WhatSpecies[Gc_AN]; sum0 = 0.0; sum1 = 0.0; vol = 0.0; tden = 0.0; tmagx = 0.0; tmagy = 0.0; tmagz = 0.0; for (Nog=0; Nog<NumOLG[Mc_AN][0]; Nog++){ /* calculate fuzzy weight */ GNc = GridListAtom[Mc_AN][Nog]; GRc = CellListAtom[Mc_AN][Nog]; Get_Grid_XYZ(GNc,Cxyz); x = Cxyz[1] + atv[GRc][1]; y = Cxyz[2] + atv[GRc][2]; z = Cxyz[3] + atv[GRc][3]; FuzzyW = Fuzzy_Weight(Gc_AN,Mc_AN,0,x,y,z); /* find charge */ MN = MGridListAtom[Mc_AN][Nog]; if (SpinP_switch<=1){ den0 = Density_Grid[0][MN]; den1 = Density_Grid[1][MN]; /* sum density */ sum0 += den0*FuzzyW; sum1 += den1*FuzzyW; /* sum volume */ vol += FuzzyW; } else{ den0 = Density_Grid[0][MN]; den1 = Density_Grid[1][MN]; theta = Density_Grid[2][MN]; phi = Density_Grid[3][MN]; rho = den0 + den1; mag = den0 - den1; magx = mag*sin(theta)*cos(phi); magy = mag*sin(theta)*sin(phi); magz = mag*cos(theta); /* sum density */ tden += rho*FuzzyW; tmagx += magx*FuzzyW; tmagy += magy*FuzzyW; tmagz += magz*FuzzyW; /* sum volume */ vol += FuzzyW; } } if (SpinP_switch<=1){ VC[0][Gc_AN] = sum0*GridVol; VC[1][Gc_AN] = sum1*GridVol; } else { tmag = sqrt(tmagx*tmagx + tmagy*tmagy + tmagz*tmagz); sum0 = 0.5*(tden + tmag); sum1 = 0.5*(tden - tmag); xyz2spherical( tmagx,tmagy,tmagz, 0.0,0.0,0.0, S_coordinate ); VC[0][Gc_AN] = sum0*GridVol; VC[1][Gc_AN] = sum1*GridVol; VC[2][Gc_AN] = S_coordinate[1]; VC[3][Gc_AN] = S_coordinate[2]; } Voronoi_Vol[Gc_AN] = vol*GridVol*BohrR*BohrR*BohrR; } /* Mc_AN */ } /* #pragma omp parallel */ /***************************************************** MPI VC *****************************************************/ for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ ID = G2ID[Gc_AN]; MPI_Bcast(&VC[0][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1); } for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ ID = G2ID[Gc_AN]; MPI_Bcast(&VC[1][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1); } if (SpinP_switch==3){ for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ ID = G2ID[Gc_AN]; MPI_Bcast(&VC[2][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1); } for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ ID = G2ID[Gc_AN]; MPI_Bcast(&VC[3][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1); } } for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ ID = G2ID[Gc_AN]; MPI_Bcast(&Voronoi_Vol[Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1); } VC_S = 0.0; T_VC0 = 0.0; T_VC1 = 0.0; for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ VC_S += VC[0][Gc_AN] - VC[1][Gc_AN]; T_VC0 += VC[0][Gc_AN]; T_VC1 += VC[1][Gc_AN]; } /**************************************** file, *.VC ****************************************/ if ( myid==Host_ID ){ sprintf(file_VC,"%s%s.VC",filepath,filename); if ((fp_VC = fopen(file_VC,"w")) != NULL){ #ifdef xt3 setvbuf(fp_VC,buf,_IOFBF,fp_bsize); /* setvbuf */ #endif fprintf(fp_VC,"\n"); fprintf(fp_VC,"***********************************************************\n"); fprintf(fp_VC,"***********************************************************\n"); fprintf(fp_VC," Voronoi charges \n"); fprintf(fp_VC,"***********************************************************\n"); fprintf(fp_VC,"***********************************************************\n\n"); fprintf(fp_VC," Sum of Voronoi charges for up = %15.12f\n", T_VC0); fprintf(fp_VC," Sum of Voronoi charges for down = %15.12f\n", T_VC1); fprintf(fp_VC," Sum of Voronoi charges for total = %15.12f\n\n", T_VC0+T_VC1); fprintf(fp_VC," Total spin magnetic moment (muB) by Voronoi charges = %15.12f\n\n",VC_S); if (SpinP_switch<=1){ fprintf(fp_VC," Up spin Down spin Sum Diff Voronoi Volume (Ang.^3)\n"); for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ fprintf(fp_VC," Atom=%4d %12.9f %12.9f %12.9f %12.9f %12.9f\n", Gc_AN, VC[0][Gc_AN], VC[1][Gc_AN], VC[0][Gc_AN] + VC[1][Gc_AN], VC[0][Gc_AN] - VC[1][Gc_AN], Voronoi_Vol[Gc_AN]); } } else{ fprintf(fp_VC," Up spin Down spin Sum Diff Theta(Deg) Phi(Deg) Voronoi Volume (Ang.^3)\n"); for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){ fprintf(fp_VC," Atom=%4d %12.9f %12.9f %12.9f %12.9f %8.4f %8.4f %12.9f\n", Gc_AN, VC[0][Gc_AN], VC[1][Gc_AN], VC[0][Gc_AN] + VC[1][Gc_AN], VC[0][Gc_AN] - VC[1][Gc_AN], VC[2][Gc_AN]/PI*180.0,VC[3][Gc_AN]/PI*180.0, Voronoi_Vol[Gc_AN]); } } fclose(fp_VC); } else{ printf("Failure of saving the VC file.\n"); } } /***************************************************** freeing of array *****************************************************/ for (spin=0; spin<4; spin++){ free(VC[spin]); } free(VC); free(Voronoi_Vol); /* for time */ dtime(&TEtime); time0 = TEtime - TStime; }
//compact B void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { int num_thd = omp_get_num_procs(); omp_set_num_threads(num_thd); SMART_ASSERT(nrhs == 4).Exit(); SMatrix<double> matZ; Vector<SMatrix<double> > vecmatDictionary; int num_dic; mexConvert(MAT_Z, matZ); mexConvert(MAT_DIC, vecmatDictionary); mexConvert(N_SPARSITY, num_dic); SMART_ASSERT(num_dic > 0)(num_dic).Exit(); int num_partitions = vecmatDictionary.size(); mwSize size[2]; size[0] = num_dic * num_partitions; size[1] = matZ.Rows(); //BINARY_REPRESENTATION = mxCreateNumericArray(2, size, mxINT16_CLASS, mxREAL); BINARY_REPRESENTATION = mxCreateNumericArray(2, size, mxUINT8_CLASS, mxREAL); SMatrix<CodeType> matRepresentation; mexConvert(BINARY_REPRESENTATION, matRepresentation); // method: // 2: ock-means described in the paper // others: jck-means IndexEncoding mp; if (num_dic > 1) { SMART_ASSERT(mxIsEmpty(PARAMETER) == false).Exit(); int is_initialize; int num_grouped; mexConvert(mxGetField(PARAMETER, 0, "is_initialize"), is_initialize);; TypeEncoding type_encoding = Type_gk_means; int is_ock = 0; { mxArray* p2 = mxGetField(PARAMETER, 0, "is_ock"); if (p2) { mexConvert(p2, is_ock); if (is_ock) { type_encoding = Type_ock; } } } { mxArray* p2 = mxGetField(PARAMETER, 0, "encoding_type"); if (p2) { string str_encoding_type; mexConvert(p2, str_encoding_type); { if (is_ock) { SMART_ASSERT(str_encoding_type == "ock").Exit(); } } if (str_encoding_type == "gkmeans") { mp.SetEncodingType(Type_gk_means); mexConvert(mxGetField(PARAMETER, 0, "num_grouped"), num_grouped); mp.SetNumberGroup(num_grouped); } else if (str_encoding_type == "ock") { mp.SetEncodingType(Type_ock); int num_can; mexConvert(mxGetField(PARAMETER, 0, "num_can"), num_can); mp.SetCandidateNumber(num_can); } else if (str_encoding_type == "additive_quantization") { mp.SetEncodingType(Type_additive_quantization); int num_can; mexConvert(mxGetField(PARAMETER, 0, "num_can"), num_can); mp.SetCandidateNumber(num_can); } else { SMART_ASSERT(0)(str_encoding_type).Exit(); } } } mp.SetIsInitialize(is_initialize); if (!is_initialize) { SMatrix<CodeType> mat_old; mexConvert(mxGetField(PARAMETER, 0, "old_codes"), mat_old); memcpy(matRepresentation.Ptr(), mat_old.Ptr(), sizeof(CodeType) * mat_old.Rows() * mat_old.Cols()); } } mp.Solve(matZ, vecmatDictionary, num_dic, matRepresentation); }
static ERR_VALUE _init_default_values() { ERR_VALUE ret = ERR_INTERNAL_ERROR; ret = option_add_UInt32(PROGRAM_OPTION_KMERSIZE, 5); if (ret == ERR_SUCCESS) ret = option_add_String(PROGRAM_OPTION_SEQFILE, "\0"); if (ret == ERR_SUCCESS) ret = option_add_UInt64(PROGRAM_OPTION_SEQSTART, (uint64_t)-1); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_SEQLEN, 100); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_TEST_STEP, 1500); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_THRESHOLD, 0); if (ret == ERR_SUCCESS) ret = option_add_String(PROGRAM_OPTION_READFILE, "\0"); if (ret == ERR_SUCCESS) ret = option_add_String(PROGRAM_OPTION_OUTPUT_DIRECTORY, "."); if (ret == ERR_SUCCESS) ret = option_add_String(PROGRAM_OPTION_VCFFILE, "\0"); if (ret == ERR_SUCCESS) ret = option_add_Int32(PROGRAM_OPTION_OMP_THREADS, omp_get_num_procs()); if (ret == ERR_SUCCESS) ret = option_add_UInt8(PROGRAM_OPTION_READ_POS_QUALITY, 10); if (ret == ERR_SUCCESS) ret = option_add_Boolean(PROGRAM_OPTION_NO_CONNECT_REFSEQ, FALSE); if (ret == ERR_SUCCESS) ret = option_add_Boolean(PROGRAM_OPTION_NO_CONNECT_READS, FALSE); if (ret == ERR_SUCCESS) ret = option_add_Boolean(PROGRAM_OPTION_NO_BUBBLE_MERGING, FALSE); if (ret == ERR_SUCCESS) ret = option_add_Boolean(PROGRAM_OPTION_NO_LINEAR_SHRINK, FALSE); if (ret == ERR_SUCCESS) ret = option_add_Boolean(PROGRAM_OPTION_NO_HELPER_VERTICES, FALSE); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_MISSING_EDGE_PENALTY, 3); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_BACKWARD_REFSEQ_PENALTY, 2); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_MAX_PATHS, 10); if (ret == ERR_SUCCESS) ret = option_add_UInt32(PROGRAM_OPTION_READ_MAX_ERROR_RATE, 20); option_set_description_const(PROGRAM_OPTION_KMERSIZE, PROGRAM_OPTION_KMERSIZE_DESC); option_set_description_const(PROGRAM_OPTION_SEQFILE, PROGRAM_OPTION_SEQFILE_DESC); option_set_description_const(PROGRAM_OPTION_SEQSTART, PROGRAM_OPTION_SEQSTART_DESC); option_set_description_const(PROGRAM_OPTION_SEQLEN, PROGRAM_OPTION_SEQLEN_DESC); option_set_description_const(PROGRAM_OPTION_TEST_STEP, PROGRAM_OPTION_TEST_STEP_DESC); option_set_description_const(PROGRAM_OPTION_THRESHOLD, PROGRAM_OPTION_THRESHOLD_DESC); option_set_description_const(PROGRAM_OPTION_READFILE, PROGRAM_OPTION_READFILE_DESC); option_set_description_const(PROGRAM_OPTION_OUTPUT_DIRECTORY, PROGRAM_OPTION_OUTPUT_DIRECTORY_DESC); option_set_description_const(PROGRAM_OPTION_VCFFILE, PROGRAM_OPTION_VCFFILE_DESC); option_set_shortcut(PROGRAM_OPTION_KMERSIZE, 'k'); option_set_shortcut(PROGRAM_OPTION_SEQFILE, 'f'); option_set_shortcut(PROGRAM_OPTION_SEQSTART, 'S'); option_set_shortcut(PROGRAM_OPTION_SEQLEN, 'l'); option_set_shortcut(PROGRAM_OPTION_TEST_STEP, 'e'); option_set_shortcut(PROGRAM_OPTION_THRESHOLD, 'w'); option_set_shortcut(PROGRAM_OPTION_READFILE, 'F'); option_set_shortcut(PROGRAM_OPTION_OUTPUT_DIRECTORY, 'o'); option_set_shortcut(PROGRAM_OPTION_VCFFILE, 'v'); return ret; }
int main(int argc, char *argv[]) { ERR_VALUE ret = ERR_INTERNAL_ERROR; utils_allocator_init(omp_get_num_procs()); omp_init_lock(&_readCoverageLock); #ifdef _MSC_VER uint64_t startTime = GetTickCount64(); #endif ret = options_module_init(37); if (ret == ERR_SUCCESS) { ret = _init_default_values(); if (ret == ERR_SUCCESS) { ret = options_parse_command_line(argc - 2, argv + 2); if (ret == ERR_SUCCESS) { PROGRAM_OPTIONS po; PROGRAM_STATISTICS st; memset(&st, 0, sizeof(st)); ret = _capture_program_options(&po); if (ret == ERR_SUCCESS) { omp_set_num_threads(po.OMPThreads); const char *cmd = argv[1]; if (strncmp(cmd, "help", sizeof("help")) == 0) { options_print_help(); } else if (strncmp(cmd, "repair", sizeof("repair")) == 0) { size_t refSeqLen = 0; FASTA_FILE seqFile; char *rsFasta = NULL; ret = fasta_load(po.RefSeqFile, &seqFile); if (ret == ERR_SUCCESS) { ret = fasta_read_seq(&seqFile, &rsFasta, &refSeqLen); po.ReferenceSequence = rsFasta; if (ret != ERR_SUCCESS) fasta_free(&seqFile); } if (ret == ERR_SUCCESS) { ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_vertexLAs); if (ret == ERR_SUCCESS) ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_edgeLAs); if (ret == ERR_SUCCESS) { ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_ONE_READ), &po.ReadSubArrays); if (ret == ERR_SUCCESS) { const size_t numThreads = omp_get_num_procs(); for (size_t i = 0; i < numThreads; ++i) { dym_array_init_ONE_READ(po.ReadSubArrays + i, 140); _vertexLAs[i] = NULL; _edgeLAs[i] = NULL; } size_t regionCount = 0; PACTIVE_REGION regions = NULL; ret = input_refseq_to_regions(po.ReferenceSequence, refSeqLen, ®ions, ®ionCount); if (ret == ERR_SUCCESS) { const ACTIVE_REGION *pa = NULL; pa = regions; for (size_t i = 0; i < regionCount; ++i) { if (pa->Type == artValid && pa->Length >= po.RegionLength) _activeRegionCount += (pa->Length / po.TestStep); ++pa; } _activeRegionProcessed = 0; pa = regions; for (size_t i = 0; i < regionCount; ++i) { if (pa->Type == artValid && pa->Length >= po.RegionLength) repair_reads_in_parallel(pa, &po); ++pa; } input_free_regions(regions, regionCount); } PONE_READ r = po.Reads; for (size_t i = 0; i < po.ReadCount; ++i) { if (r->NumberOfFixes * 100 / r->ReadSequenceLen <= po.ParseOptions.ReadMaxErrorRate) { read_quality_encode(r); read_write_sam(stdout, r); read_quality_decode(r); } ++r; } utils_free(rsFasta); int i = 0; #pragma omp parallel for shared (po) for (i = 0; i < numThreads; ++i) dym_array_finit_ONE_READ(po.ReadSubArrays + i); utils_free(po.ReadSubArrays); } } utils_free(_edgeLAs); utils_free(_vertexLAs); fasta_free(&seqFile); } } else if (strncmp(cmd, "rfreq", sizeof("rfreq")) == 0) { kmer_freq_distribution(&po, po.KMerSize, po.Reads, po.ReadCount); } else if (strncmp(cmd, "call", sizeof("call")) == 0) { fprintf(stderr, "K-mer size: %u\n", po.KMerSize); fprintf(stderr, "Active region length: %u\n", po.RegionLength); fprintf(stderr, "Reference: %s\n", po.RefSeqFile); fprintf(stderr, "Reads: %u\n", po.ReadCount); fprintf(stderr, "Read coverage threshold: %u\n", po.Threshold); fprintf(stderr, "Min. read position quality: %u\n", po.ReadPosQuality); fprintf(stderr, "OpenMP thread count: %i\n", po.OMPThreads); fprintf(stderr, "Output VCF file: %s\n", po.VCFFile); ret = paired_reads_init(); if (ret == ERR_SUCCESS) { if (ret == ERR_SUCCESS) { size_t refSeqLen = 0; FASTA_FILE seqFile; char *rsFasta = NULL; ret = fasta_load(po.RefSeqFile, &seqFile); if (ret == ERR_SUCCESS) { ret = fasta_read_seq(&seqFile, &rsFasta, &refSeqLen); po.ReferenceSequence = rsFasta; if (ret != ERR_SUCCESS) fasta_free(&seqFile); } if (ret == ERR_SUCCESS) { po.VCFFileHandle = NULL; if (*po.VCFFile != '\0') { ret = utils_fopen(po.VCFFile, FOPEN_MODE_WRITE, &po.VCFFileHandle); if (ret == ERR_SUCCESS) dym_array_init_VARIANT_CALL(&po.VCArray, 140); } if (ret == ERR_SUCCESS) { ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_vertexLAs); if (ret == ERR_SUCCESS) ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_edgeLAs); ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_VARIANT_CALL), &po.VCSubArrays); if (ret == ERR_SUCCESS) { ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_ONE_READ), &po.ReadSubArrays); if (ret == ERR_SUCCESS) { const size_t numThreads = omp_get_num_procs(); for (size_t i = 0; i < numThreads; ++i) { dym_array_init_VARIANT_CALL(po.VCSubArrays + i, 140); dym_array_init_ONE_READ(po.ReadSubArrays + i, 140); _vertexLAs[i] = NULL; _edgeLAs[i] = NULL; } size_t regionCount = 0; PACTIVE_REGION regions = NULL; ret = input_refseq_to_regions(po.ReferenceSequence, refSeqLen, ®ions, ®ionCount); if (ret == ERR_SUCCESS) { const ACTIVE_REGION *pa = NULL; pa = regions; for (size_t i = 0; i < regionCount; ++i) { if (pa->Type == artValid && pa->Length >= po.RegionLength) _activeRegionCount += (pa->Length / po.TestStep); ++pa; } _activeRegionProcessed = 0; pa = regions; for (size_t i = 0; i < regionCount; ++i) { if (pa->Type == artValid && pa->Length >= po.RegionLength) process_active_region_in_parallel(pa, &po); ++pa; } input_free_regions(regions, regionCount); } utils_free(rsFasta); ret = vc_array_merge(&po.VCArray, po.VCSubArrays, numThreads); int i = 0; #pragma omp parallel for shared(po) for (i = 0; i <(int) numThreads; ++i) { dym_array_finit_ONE_READ(po.ReadSubArrays + i); vc_array_finit(po.VCSubArrays + i); } utils_free(po.ReadSubArrays); } utils_free(po.VCSubArrays); } utils_free(_edgeLAs); utils_free(_vertexLAs); if (po.VCFFileHandle != NULL) { if (ret == ERR_SUCCESS) vc_array_print(po.VCFFileHandle, &po.VCArray); vc_array_finit(&po.VCArray); utils_fclose(po.VCFFileHandle); } } fasta_free(&seqFile); } } else printf("fix_reads(): %u\n", ret); printf("Read coverage: %lf\n", _readBaseCount / _totalRegionLength ); paired_reads_finit(); } } } } } options_module_finit(); } #ifdef _MSC_VER uint64_t endTime = GetTickCount64(); fprintf(stderr, "Time: %I64u s\n", (endTime - startTime) / 1000); #endif omp_destroy_lock(&_readCoverageLock); return ret; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A0,A; LIS_VECTOR x,b,u; LIS_SOLVER solver; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nsol,rhs,len; LIS_INT err,iter,iter_double,iter_quad; double time,itime,ptime,p_c_time,p_i_time; LIS_REAL resid; char solvername[128]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 5 ) { if( my_rank==0 ) { printf("Usage: %s matrix_filename rhs_setting solution_filename rhistory_filename [options]\n", argv[0]); } CHKERR(1); } len = (LIS_INT)strlen(argv[2]); if( len==1 ) { if( argv[2][0]=='0' || argv[2][0]=='1' || argv[2][0]=='2' ) { rhs = atoi(argv[2]); } else { rhs = -1; } } else { rhs = -1; } if( my_rank==0 ) { printf("\n"); #ifdef _LONG__LONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { #ifdef _LONG__LONG printf("max number of threads = %lld\n",omp_get_num_procs()); printf("number of threads = %lld\n",omp_get_max_threads()); #else printf("max number of threads = %d\n",omp_get_num_procs()); printf("number of threads = %d\n",omp_get_max_threads()); #endif } #endif /* read matrix and vectors from file */ err = lis_matrix_create(LIS_COMM_WORLD,&A); CHKERR(err); err = lis_vector_create(LIS_COMM_WORLD,&b); CHKERR(err); err = lis_vector_create(LIS_COMM_WORLD,&x); CHKERR(err); err = lis_input(A,b,x,argv[1]); CHKERR(err); err = lis_matrix_duplicate(A,&A0); CHKERR(err); lis_matrix_set_type(A0,LIS_MATRIX_CSR); err = lis_matrix_convert(A,A0); CHKERR(err); lis_matrix_destroy(A); A = A0; err = lis_vector_duplicate(A,&u); CHKERR(err); if( lis_vector_is_null(b) ) { lis_vector_destroy(b); lis_vector_duplicate(A,&b); CHKERR(err); if( rhs==0 ) { CHKERR(1); } else if( rhs==1 ) { err = lis_vector_set_all(1.0,b); } else { err = lis_vector_set_all(1.0,u); lis_matvec(A,u,b); } } if( rhs==-1 ) { lis_input_vector(b,argv[2]); } if( lis_vector_is_null(x) ) { lis_vector_destroy(x); err = lis_vector_duplicate(A,&x); CHKERR(err); } err = lis_solver_create(&solver); CHKERR(err); lis_solver_set_option("-print mem",solver); lis_solver_set_optionC(solver); err = lis_solve(A,b,x,solver); CHKERR(err); lis_solver_get_iterex(solver,&iter,&iter_double,&iter_quad); lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); lis_solver_get_residualnorm(solver,&resid); lis_solver_get_solver(solver,&nsol); lis_solver_get_solvername(nsol,solvername); /* write results */ if( my_rank==0 ) { #ifdef _LONG__LONG #ifdef _LONG__DOUBLE printf("%s: number of iterations = %lld \n",solvername, iter); #else printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad); #endif #else #ifdef _LONG__DOUBLE printf("%s: number of iterations = %d \n",solvername, iter); #else printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad); #endif #endif printf("%s: elapsed time = %e sec.\n",solvername,time); printf("%s: preconditioner = %e sec.\n",solvername, ptime); printf("%s: matrix creation = %e sec.\n",solvername, p_c_time); printf("%s: linear solver = %e sec.\n",solvername, itime); #ifdef _LONG__DOUBLE printf("%s: relative residual = %Le\n\n",solvername,resid); #else printf("%s: relative residual = %e\n\n",solvername,resid); #endif } /* write solution */ lis_output_vector(x,LIS_FMT_MM,argv[3]); /* write residual history */ lis_solver_output_rhistory(solver, argv[4]); lis_solver_destroy(solver); lis_vector_destroy(x); lis_vector_destroy(u); lis_vector_destroy(b); lis_matrix_destroy(A); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
// ============================================================ int main( const int nArg, const char *aArg[] ) { // BEGIN OMP gnThreadsMaximum = omp_get_num_procs(); // END OMP int iArg = 1; for( iArg = 1; iArg < nArg; iArg++ ) { if (aArg[ iArg ][0] == '-' ) { if (aArg[iArg][1] == 'j') { iArg++; if (iArg > nArg) return printf( "Invalid # of threads to use.\n" ); gnThreadsActive = atoi( aArg[ iArg ] ); if (gnThreadsActive < 0) gnThreadsActive = 0; if (gnThreadsActive > gnThreadsMaximum) gnThreadsActive = gnThreadsMaximum; } } else break; } prime_t max = (nArg > iArg) ? (prime_t) atou( aArg[ iArg ] ) // : 6; // Test 6i+1>max && isprime(6i+1)==true // : 32; // Test 8 core // : 64; // Test 8 core // : 255; // 2^8 Test 8 core // : 256; //10^3 Test 8 core [54] = 251 // Largest 8-bit prime // : 100; //10^2 [ 25] = 97 // 25 primes between 1 and 100 // : 1000; //10^3 [ 168] = 997 // 10000; //10^4 [ 1,229] = 9,973 // // : 65536; // 2^16 [ 6,542] = 65,521 // x86: 00:00:00.001, x64: 00:00:00.000 Primes/Sec: 64,000,000 K#/s Largest 16-bit prime // : 100000; //10^5 [ 9,592] = 99,991 // x86: 00:00:00.001, x64: 00:00:00.000 Primes/Sec: 97,000,000 K#/s // : 611953; // [ 50,000] = 611,953 // x86: 00:00:00.002, x64: 00:00:00.002 Primes/Sec: 298,500 K#/s First 50,000 primes // : 1000000; //10^6 [ 78,498] = 999,983 // x86: 00:00:00.003, x64: 00:00:00.002 Primes/Sec: 488,000 K#/s : 10000000; //10^7 [ 664,579] = 9,999,991 // x86: 00:00:00.031, x64: 00:00:00.034 Primes/Sec: 264 M#/s // : 15485863; // [ 1,000,000] = 15,485,863 // x86: 00:00:00.057, x64: 00:00:00.055 Primes/Sec: 254 M#/s First 1,000,000 primes // : 100000000; //10^8 [ 5,761,455] = 99,999,989 // x86: 00:00:00.490, x64: 00:00:00.484 Primes/Sec: 196 M#/s // : 1000000000; //10^9 [ 50,847,534] = 999,999,937 // x86: crash x64: 00:00:10.590 Primes/Sec: 89 M#/s // : 2038074743; // [ 100,000,000] = 2,038,074,743 // x64: 00:00:23.130 Primes/Sec: 84 M#/s First 100,000,000 primes // : 2147483644; // 2^31-4 [ 105,097,564] = 2,147,483,629 // x64: 00:00:24.502 Primes/Sec: 83 M#/s // : 2147483647; // 2^31-1 [ ] // : 2147483648; // 2^31 [ 105,097,565] = 2,147,483,647 // x64: 00:00:43.818 Primes/Sec: 46 M#/s // : 4294967292; // 2^32-4 [ ] // : 4294967295; // 2^32-1 [ ] // : 4294967296; // 2^32 [ 203,280,221] = // :10000000000; //10^10 [ 455,052,511] = // : 1e11; //10^11 [ 4,118,054,813] = // : 1e12; //10^12 [ 37,607,912,018] = // : 1e13; //10^13 [346,065,536,839] = AllocArray ( max ); TimerStart ( max ); BuildPrimes( max ); TimerStop ( max ); getchar(); PrintPrimes(); DeleteArray(); return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_INT i,n,gn,is,ie; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nesol; LIS_MATRIX A; LIS_VECTOR x; LIS_REAL evalue0; LIS_ESOLVER esolver; LIS_REAL residual; LIS_INT iter; double time; double itime,ptime,p_c_time,p_i_time; char esolvername[128]; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 2 ) { if( my_rank==0 ) { printf("Usage: %s n [eoptions]\n", argv[0]); } CHKERR(1); } if( my_rank==0 ) { printf("\n"); printf("number of processes = %d\n",nprocs); } #ifdef _OPENMP if( my_rank==0 ) { #ifdef _LONG__LONG printf("max number of threads = %lld\n",omp_get_num_procs()); printf("number of threads = %lld\n",omp_get_max_threads()); #else printf("max number of threads = %d\n",omp_get_num_procs()); printf("number of threads = %d\n",omp_get_max_threads()); #endif } #endif /* generate coefficient matrix for one dimensional Poisson equation */ n = atoi(argv[1]); lis_matrix_create(LIS_COMM_WORLD,&A); lis_matrix_set_size(A,0,n); lis_matrix_get_size(A,&n,&gn); lis_matrix_get_range(A,&is,&ie); for(i=is;i<ie;i++) { if( i>0 ) lis_matrix_set_value(LIS_INS_VALUE,i,i-1,-1.0,A); if( i<gn-1 ) lis_matrix_set_value(LIS_INS_VALUE,i,i+1,-1.0,A); lis_matrix_set_value(LIS_INS_VALUE,i,i,2.0,A); } lis_matrix_set_type(A,LIS_MATRIX_CSR); lis_matrix_assemble(A); lis_vector_duplicate(A,&x); lis_esolver_create(&esolver); lis_esolver_set_option("-eprint mem",esolver); lis_esolver_set_optionC(esolver); lis_esolve(A, x, &evalue0, esolver); lis_esolver_get_esolver(esolver,&nesol); lis_esolver_get_esolvername(nesol,esolvername); lis_esolver_get_residualnorm(esolver, &residual); lis_esolver_get_iter(esolver, &iter); lis_esolver_get_timeex(esolver,&time,&itime,&ptime,&p_c_time,&p_i_time); if( my_rank==0 ) { printf("%s: mode number = %d\n", esolvername, 0); #ifdef _LONG__DOUBLE printf("%s: eigenvalue = %Le\n", esolvername, evalue0); #else printf("%s: eigenvalue = %e\n", esolvername, evalue0); #endif #ifdef _LONG__LONG printf("%s: number of iterations = %lld\n",esolvername, iter); #else printf("%s: number of iterations = %d\n",esolvername, iter); #endif printf("%s: elapsed time = %e sec.\n", esolvername, time); printf("%s: preconditioner = %e sec.\n", esolvername, ptime); printf("%s: matrix creation = %e sec.\n", esolvername, p_c_time); printf("%s: linear solver = %e sec.\n", esolvername, itime); #ifdef _LONG__DOUBLE printf("%s: relative residual = %Le\n\n",esolvername, residual); #else printf("%s: relative residual = %e\n\n",esolvername, residual); #endif } /* lis_vector_nrm2(x, &xnrm2); lis_vector_scale((1/xnrm2*sqrt(n)), x); lis_vector_print(x); */ /* lis_vector_create(LIS_COMM_WORLD,&y); lis_matrix_create(LIS_COMM_WORLD,&B); lis_esolver_get_evalues(esolver,y); lis_esolver_get_evectors(esolver,B); lis_output_vector(y,LIS_FMT_MM,"evalues.out"); lis_output_matrix(B,LIS_FMT_MM,"evectors.out"); lis_vector_destroy(y); lis_matrix_destroy(B); */ lis_esolver_destroy(esolver); lis_matrix_destroy(A); lis_vector_destroy(x); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
// using namespace std; int _tmain(int argc, _TCHAR* argv[]) { wcout << L"StatisGA:" << endl; // setlocale( LC_ALL, "chs" ); // wstring test = L"ищ"; // wcout << test<<endl; wstring InitialParaFile = L"Para.txt"; wstring OutpuParaFile = L"Para.txt"; bool bMode = false; wcout << L"Choose mode: 0 for Train(default); 1 for test" << endl; wcin >> bMode; NumCharact NCharact; wstring InputDataFile = L"399300"; //if (true == bMode) //{ // wcout << L"Input data file name:"; // wcin >> InputDataFile; //} //wchar_t CurrentPath[1024]; //GetCurrentDirectory(1024, CurrentPath); vector<vector<double>> OriginalDataArray; vector<wstring> DateArray; ReadDataFile(DateArray, OriginalDataArray, InputDataFile); int DataStart = 150; double StartValue = 0; if (true == bMode) { DataStart = 0; // wcout << L"Input Start value:"; // wcin >> StartValue; } int CyclePara = 1; NCharact.DataPreprocess(CyclePara, DataStart, StartValue, OriginalDataArray); NCharact.TrainFactorLength = 30; NCharact.FactorNumber = 14; NCharact.CheckSize = 2; const int CalcuNumberK = 6; NCharact.FactorRange = 10901; const int TolerantRange = 250; const int TolerantNumber = 2; __int64 GANumber = 1000100000;// wstringstream FileNameStream; FileNameStream << InputDataFile << L"_" << NCharact.FactorNumber << L"_" << TolerantRange << L"_" << TolerantNumber << CyclePara << L".txt";//<< L"750.txt";// InitialParaFile = FileNameStream.str(); OutpuParaFile = FileNameStream.str(); FileNameStream.clear(); NCharact.InitTrainArray(CalcuNumberK, TolerantRange, TolerantNumber, InitialParaFile); if (false == bMode) { int NThreads = omp_get_num_procs() - 1; int NOut = 100000; unsigned __int64 nn = 0; vector<vector<int>> SingleTrainArray; #ifndef _DEBUG #pragma omp parallel for num_threads(NThreads) firstprivate(SingleTrainArray) //, TargetPointResultArray) #endif for (__int64 n = __int64(GANumber * 0.0); n < GANumber; n++) { double NPercent = double(nn) / GANumber; int TrainFactorIndex = n % NCharact.TrainFactorLength; NCharact.GAFunction(NPercent, TrainFactorIndex, SingleTrainArray, CalcuNumberK); vector<double> JudgeElement(2, 0.0); NCharact.ANOVA2(CalcuNumberK, TolerantRange, TolerantNumber, SingleTrainArray, JudgeElement); #pragma omp critical { NCharact.GAReprocess(NPercent, TrainFactorIndex, SingleTrainArray, JudgeElement); if(0 == nn % NOut) { wcout << nn << L" "; NCharact.OutputParaFile(OutpuParaFile); } nn++; } } } else { NCharact.TestLatestData(DateArray, TolerantRange, CalcuNumberK, 450); } /* const int HistogOxLength = 20; const int HistogDataLength = 20; const int HistogSize = 4; NCharact.HistogCoOx.insert(NCharact.HistogCoOx.end(), HistogOxLength, 0.0); vector<int> HistogTemp(HistogDataLength, 0); NCharact.HistogData.insert(NCharact.HistogData.end(), HistogSize, HistogTemp); NCharact.GenerateHistogDistr(CheckSize, TargetPointResultArray); */ OriginalDataArray.clear(); DateArray.clear(); NCharact.DataClear(); string tempstring; cout << endl << "end."; cin >> tempstring; return 0; }
int sapporo::open(std::string kernelFile, int *devices, int nprocs, int order, int precision) { //Set the integration order integrationOrder = order; integrationPrecision = precision; cout << "Integration order used: " << integrationOrder << " (0=GRAPE5, 1=4th, 2=6th, 3=8th)\n"; cout << "Integration precision used: " << precision << " (0=FLOAT, 1 = DOUBLESINGLE, 2=DOUBLE)\n"; dev::context contextTest; //Only used to retrieve the number of devices int numDev = 0; #ifdef __OPENCL_DEV__ numDev = contextTest.getDeviceCount(CL_DEVICE_TYPE_GPU, 0); #else numDev = contextTest.getDeviceCount(); #endif cout << "Number of cpus available: " << omp_get_num_procs() << endl; cout << "Number of gpus available: " << numDev << endl; // create as many CPU threads as there are CUDA devices and create the contexts int numThread = abs(nprocs); if(numThread == 0) //Use as many as available { numThread = numDev; } deviceList = new sapporo2::device*[numThread]; numberOfGPUUsedBySapporo = numThread; // omp_set_num_threads(numThread); #pragma omp parallel num_threads(numberOfGPUUsedBySapporo) { //Create context for each thread unsigned int tid = omp_get_thread_num(); sapdevice = new sapporo2::device(); deviceList[tid] = sapdevice; //Let the driver try to get a device if nprocs < 0 //Use 1...N if nprocs == 0 //Use list if nprocs > 0 int dev = -1; if(nprocs == 0) //Device ID is thread ID { dev = tid; } else if(nprocs > 0) { dev = devices[tid]; //The user gave us a set of device ids } //Assign the device and load the kernels sapdevice->assignDevice(dev, integrationOrder); const char *gravityKernel = get_kernelName(integrationOrder, precision,sapdevice->sharedMemPerThread); sapdevice->loadComputeKernels(kernelFile.c_str(), gravityKernel); if(tid == 0) { nCUDAdevices = omp_get_num_threads(); } //Allocate initial memory for 16k particles per device sapdevice->allocateMemory(16384, get_n_pipes()); nj_max = 16384; }//end pragma omp parallel //Used to store j-memory particle counters jCopyInformation.resize(nCUDAdevices); CPUThreshold = -1; //By Default GPU is always used #if 0 #ifdef CPU_SUPPORT const int nMaxTest = 2049; const int nMaxLoop = 2049; const int nIncrease = 16; //At the start of the program figure out at which point the GPU will be faster //than the host CPU. This can either be based on ni, nj, or on a combination //of ni*nj = #interactions. Then if #interactions < GPUOptimal do host compute //otherwise do GPU compute. Stored in CPUThreshold //#pragma omp parallel { //First fill the ids with valid info otherwise testing might fail, if all ids are 0 for(int i=0; i < nMaxTest; i++) { if(i < NPIPES) { sapdevice->id_i[i] = i; sapdevice->pos_i[i].x = (1.0 - 2.0*drand48()); sapdevice->pos_i[i].y = (1.0 - 2.0*drand48()); sapdevice->pos_i[i].z = (1.0 - 2.0*drand48()); sapdevice->pos_i[i].w = 1./1024; sapdevice->vel_i[i].x = drand48() * 0.1; sapdevice->vel_i[i].y = drand48() * 0.1; sapdevice->vel_i[i].z = drand48() * 0.1; } if(i < nj_max) { sapdevice->id_j[i] = i; sapdevice->pPos_j[i].x = (1.0 - 2.0*drand48()); sapdevice->pPos_j[i].y = (1.0 - 2.0*drand48()); sapdevice->pPos_j[i].z = (1.0 - 2.0*drand48()); sapdevice->pPos_j[i].w = 1./1024; sapdevice->pVel_j[i].x = drand48() * 0.1; sapdevice->pVel_j[i].y = drand48() * 0.1; sapdevice->pVel_j[i].z = drand48() * 0.1; } } //Some temp buffers, are being used multiple //times and contain only bogus data double (*pos)[3] = new double[nMaxTest][3]; double (*vel)[3] = new double[nMaxTest][3]; double (*acc)[3] = new double[nMaxTest][3]; double (*jrk)[3] = new double[nMaxTest][3]; double *tempBuff = new double[nMaxTest]; double *timingMatrixGPU = new double[nMaxTest*nMaxTest]; double *timingMatrixCPU = new double[nMaxTest*nMaxTest]; //First call to initialize device evaluate_gravity(1, 1); retrieve_i_particle_results(1); double tTime = 0; CPUThreshold = -1; //Negative to force GPU timings for(int k=0; k < nMaxLoop; k+=nIncrease) //number of i-particles { for(int m=0; m < nMaxLoop; m+=nIncrease) //number of j-particles { int kk=k, mm=m; if(k==0) kk = 1; if(m==0) mm = 1; timingMatrixGPU[m*nMaxTest+k] = 0; for(int n=0; n < 10; n++) { double t0 = get_time(); set_time(tTime);//set time startGravCalc(mm,kk, &sapdevice->id_i[0], pos, vel,acc, acc,tempBuff, 1./ nMaxTest, tempBuff, NULL); getGravResults(mm,kk, &sapdevice->id_i[0], pos, vel, 1./ nMaxTest, NULL, acc, jrk, acc, jrk, tempBuff, NULL, tempBuff, false); // fprintf(stderr, "TEST DEV: Took: nj: %d ni: %d \t %g\n", m, k, get_time() - t0); timingMatrixGPU[m*nMaxTest+k] += get_time() - t0; tTime += 0.0001; }//for n }//for m }//for k #if 0 CPUThreshold = 10e10; //Huge to force CPU timings //First call outside loop, to boot-up openMP evaluate_gravity_host(1, 1); evaluate_gravity_host_vector(1, 1); tTime = 0; for(int k=1; k < nMaxLoop; k+=nIncrease) { for(int m=1; m < nMaxLoop; m+=nIncrease) { timingMatrixCPU[m*nMaxTest+k] = 0; for(int n=0; n < 10; n++) { int kk=k, mm=m; if(k==0) kk = 1; if(m==0) mm = 1; double t0 = get_time(); set_time(tTime);//set time startGravCalc(mm,kk, &sapdevice->id_i[0], pos, vel,acc, acc,tempBuff, 1./ nMaxTest, tempBuff, NULL); getGravResults(mm,kk, &sapdevice->id_i[0], pos, vel, 1./ nMaxTest, NULL, acc, jrk, acc, jrk, tempBuff, NULL, tempBuff, false); // fprintf(stderr, "TEST CPU: Took: nj: %d ni: %d \t %g\n", m, k, get_time() - t0); timingMatrixCPU[m*nMaxTest+k] += get_time() - t0; tTime += 0.0001; }//for n }//for m } //for k #endif //Write timing data to file FILE *foutT = fopen("data.txt","w"); //Print timing results GPU fprintf(stderr, "GPU timings:\nni"); fprintf(foutT, "ni"); for(int i=1; i < nMaxLoop; i+=nIncrease) fprintf(foutT, "\t%d", i); fprintf(foutT, "\n"); fprintf(foutT, "nj\n"); for(int j=1; j < nMaxLoop; j+=nIncrease) { fprintf(foutT, "%d\t", j); for(int i=1; i < nMaxLoop; i+=nIncrease) { fprintf(foutT, "%f\t", timingMatrixGPU[j*nMaxTest+i]); } fprintf(foutT, "\n"); } fclose(foutT); exit(0); fprintf(stderr, "\nCPU timings:\nni"); for(int i=1; i < nMaxLoop; i+=nIncrease) fprintf(stderr, "\t%d", i); fprintf(stderr, "\n"); fprintf(stderr, "nj\n"); for(int j=1; j < nMaxLoop; j+=nIncrease) { fprintf(stderr, "%d\t", j); for(int i=1; i < nMaxLoop; i+=nIncrease) { fprintf(stderr, "%f\t", timingMatrixCPU[j*nMaxTest+i]); } fprintf(stderr, "\n"); } fprintf(stderr, "GPU timings:\n"); for(int j=1; j < nMaxLoop; j+=nIncrease) { for(int i=1; i < nMaxLoop; i+=nIncrease) { // fprintf(stderr,"%f\t%f\t%f\t%f\n", fprintf(stderr,"%d\t%f\t%f\n", i*j, timingMatrixGPU[j*nMaxTest+i], timingMatrixCPU[j*nMaxTest+i]); // j / timingMatrixGPU[j*nMaxTest+i], // i / timingMatrixGPU[j*nMaxTest+i], // j / timingMatrixCPU[j*nMaxTest+i], // i / timingMatrixCPU[j*nMaxTest+i]); }//for i fprintf(stderr, "\n"); } //for j //TODO set some interaction count number that is the break-even point //between CPU and GPU computations delete[] pos; delete[] vel; delete[] acc; delete[] jrk; delete[] tempBuff; delete[] timingMatrixGPU; delete[] timingMatrixCPU; } exit(0); #endif //ifdef CPU support #endif return 0; }
Inputs read_CLI( int argc, char * argv[] ) { Inputs input; memset(&input, 0, sizeof(Inputs)); // defaults to max threads on the system input.nthreads = omp_get_num_procs(); // defaults to 355 (corresponding to H-M Large benchmark) input.n_isotopes = 355; // defaults to 11303 (corresponding to H-M Large benchmark) input.n_gridpoints = 11303; // defaults to 15,000,000 input.lookups = 15000000; // defaults to H-M Large benchmark input.HM = (char *) malloc( 6 * sizeof(char) ); strcpy(input.HM, "small"); /* input.HM[0] = 'l' ; input.HM[1] = 'a' ; input.HM[2] = 'r' ; input.HM[3] = 'g' ; input.HM[4] = 'e' ; input.HM[5] = '\0'; */ #ifdef __USE_AMD_OCL__ input.tloops = 1; input.run_cpu = false; #endif input.savegrids = false; input.restoregrids = false; strcpy(input.file_name, "grids"); // Check if user sets these int user_g = 0; // Collect Raw Input for( int i = 1; i < argc; i++ ) { char * arg = argv[i]; // nthreads (-t) if( strcmp(arg, "-t") == 0 ) { if( ++i < argc ) input.nthreads = atoi(argv[i]); else print_CLI_error(); } // n_gridpoints (-g) else if( strcmp(arg, "-g") == 0 ) { if( ++i < argc ) { user_g = 1; input.n_gridpoints = atoi(argv[i]); } else print_CLI_error(); } // lookups (-l) else if( strcmp(arg, "-l") == 0 ) { if( ++i < argc ) input.lookups = atoi(argv[i]); else print_CLI_error(); } // HM (-s) else if( strcmp(arg, "-s") == 0 ) { if( ++i < argc ) input.HM = argv[i]; else print_CLI_error(); } #ifdef __USE_AMD_OCL__ else if( strcmp(arg, "-k") == 0 ) { if( ++i < argc ) input.tloops = atoi(argv[i]); else print_CLI_error(); } else if( strcmp(arg, "-c") == 0 ) { input.run_cpu = true; } #endif else if( strcmp(arg, "-v") == 0 ) { input.savegrids = true; } else if( strcmp(arg, "-r") == 0 ) { input.restoregrids = true; } else if( strcmp(arg, "-f") == 0 ) { if( ++i < argc ) strcpy(input.file_name,argv[i]); else print_CLI_error(); } else print_CLI_error(); } if(input.savegrids) input.restoregrids = false; // Validate Input // Validate nthreads if( input.nthreads < 1 ) print_CLI_error(); // Validate n_isotopes if( input.n_isotopes < 1 ) print_CLI_error(); // Validate n_gridpoints if( input.n_gridpoints < 1 ) print_CLI_error(); // Validate lookups if( input.lookups < 1 ) print_CLI_error(); // Validate HM size if( strcasecmp(input.HM, "small") != 0 && strcasecmp(input.HM, "large") != 0 && strcasecmp(input.HM, "XL") != 0 && strcasecmp(input.HM, "XXL") != 0 ) print_CLI_error(); // Set HM size specific parameters // (defaults to large) if( strcasecmp(input.HM, "small") == 0 ) input.n_isotopes = 68; else if( strcasecmp(input.HM, "XL") == 0 && user_g == 0 ) input.n_gridpoints = 238847; // sized to make 120 GB XS data else if( strcasecmp(input.HM, "XXL") == 0 && user_g == 0 ) input.n_gridpoints = 238847 * 2.1; // 252 GB XS data // Return input struct return input; }
int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load_data, lua_State *L) { double start_wtime = dt_get_wtime(); #ifndef __WIN32__ if(getuid() == 0 || geteuid() == 0) printf( "WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n"); #endif #if defined(__SSE__) // make everything go a lot faster. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif dt_set_signal_handlers(); #include "is_supported_platform.h" int sse2_supported = 0; #ifdef HAVE_BUILTIN_CPU_SUPPORTS // NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC __builtin_cpu_init(); sse2_supported = __builtin_cpu_supports("sse2"); #else sse2_supported = dt_detect_cpu_features() & CPU_FLAG_SSE2; #endif if(!sse2_supported) { fprintf(stderr, "[dt_init] SSE2 instruction set is unavailable.\n"); fprintf(stderr, "[dt_init] expect a LOT of functionality to be broken. you have been warned.\n"); } #ifdef M_MMAP_THRESHOLD mallopt(M_MMAP_THRESHOLD, 128 * 1024); /* use mmap() for large allocations */ #endif // make sure that stack/frame limits are good (musl) dt_set_rlimits(); // we have to have our share dir in XDG_DATA_DIRS, // otherwise GTK+ won't find our logo for the about screen (and maybe other things) { const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS"); gchar *new_xdg_data_dirs = NULL; gboolean set_env = TRUE; if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0') { // check if DARKTABLE_SHAREDIR is already in there gboolean found = FALSE; gchar **tokens = g_strsplit(xdg_data_dirs, G_SEARCHPATH_SEPARATOR_S, 0); // xdg_data_dirs is neither NULL nor empty => tokens != NULL for(char **iter = tokens; *iter != NULL; iter++) if(!strcmp(DARKTABLE_SHAREDIR, *iter)) { found = TRUE; break; } g_strfreev(tokens); if(found) set_env = FALSE; else new_xdg_data_dirs = g_strjoin(G_SEARCHPATH_SEPARATOR_S, DARKTABLE_SHAREDIR, xdg_data_dirs, NULL); } else { #ifndef _WIN32 // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a // default if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/")) new_xdg_data_dirs = g_strdup("/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/"); else new_xdg_data_dirs = g_strdup_printf("%s" G_SEARCHPATH_SEPARATOR_S "/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/", DARKTABLE_SHAREDIR); #else set_env = FALSE; #endif } if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1); g_free(new_xdg_data_dirs); } setlocale(LC_ALL, ""); bindtextdomain(GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR); bind_textdomain_codeset(GETTEXT_PACKAGE, "UTF-8"); textdomain(GETTEXT_PACKAGE); // init all pointers to 0: memset(&darktable, 0, sizeof(darktable_t)); darktable.start_wtime = start_wtime; darktable.progname = argv[0]; // FIXME: move there into dt_database_t dt_pthread_mutex_init(&(darktable.db_insert), NULL); dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL); dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL); darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t)); // database char *dbfilename_from_command = NULL; char *noiseprofiles_from_command = NULL; char *datadir_from_command = NULL; char *moduledir_from_command = NULL; char *tmpdir_from_command = NULL; char *configdir_from_command = NULL; char *cachedir_from_command = NULL; #ifdef HAVE_OPENCL gboolean exclude_opencl = FALSE; gboolean print_statistics = strcmp(argv[0], "darktable-cltest"); #endif #ifdef USE_LUA char *lua_command = NULL; #endif darktable.num_openmp_threads = 1; #ifdef _OPENMP darktable.num_openmp_threads = omp_get_num_procs(); #endif darktable.unmuted = 0; GSList *config_override = NULL; for(int k = 1; k < argc; k++) { if(argv[k][0] == '-') { if(!strcmp(argv[k], "--help")) { return usage(argv[0]); } if(!strcmp(argv[k], "-h")) { return usage(argv[0]); } else if(!strcmp(argv[k], "--version")) { #ifdef USE_LUA const char *lua_api_version = strcmp(LUA_API_VERSION_SUFFIX, "") ? STR(LUA_API_VERSION_MAJOR) "." STR(LUA_API_VERSION_MINOR) "." STR(LUA_API_VERSION_PATCH) "-" LUA_API_VERSION_SUFFIX : STR(LUA_API_VERSION_MAJOR) "." STR(LUA_API_VERSION_MINOR) "." STR(LUA_API_VERSION_PATCH); #endif printf("this is %s\ncopyright (c) 2009-%s johannes hanika\n" PACKAGE_BUGREPORT "\n\ncompile options:\n" " bit depth is %s\n" #ifdef _DEBUG " debug build\n" #else " normal build\n" #endif #if defined(__SSE2__) && defined(__SSE__) " SSE2 optimized codepath enabled\n" #else " SSE2 optimized codepath disabled\n" #endif #ifdef _OPENMP " OpenMP support enabled\n" #else " OpenMP support disabled\n" #endif #ifdef HAVE_OPENCL " OpenCL support enabled\n" #else " OpenCL support disabled\n" #endif #ifdef USE_LUA " Lua support enabled, API version %s\n" #else " Lua support disabled\n" #endif #ifdef USE_COLORDGTK " Colord support enabled\n" #else " Colord support disabled\n" #endif #ifdef HAVE_GPHOTO2 " gPhoto2 support enabled\n" #else " gPhoto2 support disabled\n" #endif #ifdef HAVE_GRAPHICSMAGICK " GraphicsMagick support enabled\n" #else " GraphicsMagick support disabled\n" #endif #ifdef HAVE_OPENEXR " OpenEXR support enabled\n" #else " OpenEXR support disabled\n" #endif , darktable_package_string, darktable_last_commit_year, (sizeof(void *) == 8 ? "64 bit" : sizeof(void *) == 4 ? "32 bit" : "unknown") #if USE_LUA , lua_api_version #endif ); return 1; } else if(!strcmp(argv[k], "--library") && argc > k + 1) { dbfilename_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--datadir") && argc > k + 1) { datadir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--moduledir") && argc > k + 1) { moduledir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--tmpdir") && argc > k + 1) { tmpdir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--configdir") && argc > k + 1) { configdir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--cachedir") && argc > k + 1) { cachedir_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--localedir") && argc > k + 1) { bindtextdomain(GETTEXT_PACKAGE, argv[++k]); argv[k-1] = NULL; argv[k] = NULL; } else if(argv[k][1] == 'd' && argc > k + 1) { if(!strcmp(argv[k + 1], "all")) darktable.unmuted = 0xffffffff; // enable all debug information else if(!strcmp(argv[k + 1], "cache")) darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module else if(!strcmp(argv[k + 1], "control")) darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module else if(!strcmp(argv[k + 1], "dev")) darktable.unmuted |= DT_DEBUG_DEV; // develop module else if(!strcmp(argv[k + 1], "input")) darktable.unmuted |= DT_DEBUG_INPUT; // input devices else if(!strcmp(argv[k + 1], "camctl")) darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module else if(!strcmp(argv[k + 1], "perf")) darktable.unmuted |= DT_DEBUG_PERF; // performance measurements else if(!strcmp(argv[k + 1], "pwstorage")) darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module else if(!strcmp(argv[k + 1], "opencl")) darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl else if(!strcmp(argv[k + 1], "sql")) darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries else if(!strcmp(argv[k + 1], "memory")) darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then. else if(!strcmp(argv[k + 1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff. else if(!strcmp(argv[k + 1], "nan")) darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe. else if(!strcmp(argv[k + 1], "masks")) darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff. else if(!strcmp(argv[k + 1], "lua")) darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console else if(!strcmp(argv[k + 1], "print")) darktable.unmuted |= DT_DEBUG_PRINT; // print errors are reported on console else if(!strcmp(argv[k + 1], "camsupport")) darktable.unmuted |= DT_DEBUG_CAMERA_SUPPORT; // camera support warnings are reported on console else return usage(argv[0]); k++; argv[k-1] = NULL; argv[k] = NULL; } else if(argv[k][1] == 't' && argc > k + 1) { darktable.num_openmp_threads = CLAMP(atol(argv[k + 1]), 1, 100); printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads); k++; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--conf") && argc > k + 1) { gchar *keyval = g_strdup(argv[++k]), *c = keyval; argv[k-1] = NULL; argv[k] = NULL; gchar *end = keyval + strlen(keyval); while(*c != '=' && c < end) c++; if(*c == '=' && *(c + 1) != '\0') { *c++ = '\0'; dt_conf_string_entry_t *entry = (dt_conf_string_entry_t *)g_malloc(sizeof(dt_conf_string_entry_t)); entry->key = g_strdup(keyval); entry->value = g_strdup(c); config_override = g_slist_append(config_override, entry); } g_free(keyval); } else if(!strcmp(argv[k], "--noiseprofiles") && argc > k + 1) { noiseprofiles_from_command = argv[++k]; argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--luacmd") && argc > k + 1) { #ifdef USE_LUA lua_command = argv[++k]; #else ++k; #endif argv[k-1] = NULL; argv[k] = NULL; } else if(!strcmp(argv[k], "--disable-opencl")) { #ifdef HAVE_OPENCL exclude_opencl = TRUE; #endif argv[k] = NULL; } else if(!strcmp(argv[k], "--")) { // "--" confuses the argument parser of glib/gtk. remove it. argv[k] = NULL; break; } else return usage(argv[0]); // fail on unrecognized options } } // remove the NULLs to not confuse gtk_init() later. for(int i = 1; i < argc; i++) { int k; for(k = i; k < argc; k++) if(argv[k] != NULL) break; if(k > i) { k -= i; for(int j = i + k; j < argc; j++) { argv[j-k] = argv[j]; argv[j] = NULL; } argc -= k; } } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] at startup\n"); dt_print_mem_usage(); } if(init_gui) { // I doubt that connecting to dbus for darktable-cli makes sense darktable.dbus = dt_dbus_init(); // make sure that we have no stale global progress bar visible. thus it's run as early is possible dt_control_progress_init(darktable.control); } #ifdef _OPENMP omp_set_num_threads(darktable.num_openmp_threads); #endif dt_loc_init_datadir(datadir_from_command); dt_loc_init_plugindir(moduledir_from_command); if(dt_loc_init_tmp_dir(tmpdir_from_command)) { fprintf(stderr, "error: invalid temporary directory: %s\n", darktable.tmpdir); return usage(argv[0]); } dt_loc_init_user_config_dir(configdir_from_command); dt_loc_init_user_cache_dir(cachedir_from_command); #ifdef USE_LUA dt_lua_init_early(L); #endif // thread-safe init: dt_exif_init(); char datadir[PATH_MAX] = { 0 }; dt_loc_get_user_config_dir(datadir, sizeof(datadir)); char darktablerc[PATH_MAX] = { 0 }; snprintf(darktablerc, sizeof(darktablerc), "%s/darktablerc", datadir); // initialize the config backend. this needs to be done first... darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t)); dt_conf_init(darktable.conf, darktablerc, config_override); g_slist_free_full(config_override, g_free); // set the interface language const gchar *lang = dt_conf_get_string("ui_last/gui_language"); #if defined(_WIN32) // get the default locale if no language preference was specified in the config file if(lang == NULL || lang[0] == '\0') { const wchar_t *wcLocaleName = NULL; wcLocaleName = dtwin_get_locale(); if(wcLocaleName != NULL) { gchar *langLocale; langLocale = g_utf16_to_utf8(wcLocaleName, -1, NULL, NULL, NULL); if(langLocale != NULL) { g_free((gchar *)lang); lang = g_strdup(langLocale); } } } #endif // defined (_WIN32) if(lang != NULL && lang[0] != '\0') { g_setenv("LANGUAGE", lang, 1); if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale(); setlocale(LC_MESSAGES, lang); g_setenv("LANG", lang, 1); } g_free((gchar *)lang); // we need this REALLY early so that error messages can be shown, however after gtk_disable_setlocale if(init_gui) { #ifdef GDK_WINDOWING_WAYLAND // There are currently bad interactions with Wayland (drop-downs // are very narrow, scroll events lost). Until this is fixed, give // priority to the XWayland backend for Wayland users. gdk_set_allowed_backends("x11,*"); #endif gtk_init(&argc, &argv); } // detect cpu features and decide which codepaths to enable dt_codepaths_init(); // get the list of color profiles darktable.color_profiles = dt_colorspaces_init(); // initialize the database darktable.db = dt_database_init(dbfilename_from_command, load_data); if(darktable.db == NULL) { printf("ERROR : cannot open database\n"); return 1; } else if(!dt_database_get_lock_acquired(darktable.db)) { gboolean image_loaded_elsewhere = FALSE; #ifndef MAC_INTEGRATION // send the images to the other instance via dbus fprintf(stderr, "trying to open the images in the running instance\n"); GDBusConnection *connection = NULL; for(int i = 1; i < argc; i++) { // make the filename absolute ... if(argv[i] == NULL || *argv[i] == '\0') continue; gchar *filename = dt_util_normalize_path(argv[i]); if(filename == NULL) continue; if(!connection) connection = g_bus_get_sync(G_BUS_TYPE_SESSION, NULL, NULL); // ... and send it to the running instance of darktable image_loaded_elsewhere = g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable", "org.darktable.service.Remote", "Open", g_variant_new("(s)", filename), NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL) != NULL; g_free(filename); } if(connection) g_object_unref(connection); #endif if(!image_loaded_elsewhere) dt_database_show_error(darktable.db); return 1; } // Initialize the signal system darktable.signals = dt_control_signal_init(); // Make sure that the database and xmp files are in sync // We need conf and db to be up and running for that which is the case here. // FIXME: is this also useful in non-gui mode? GList *changed_xmp_files = NULL; if(init_gui && dt_conf_get_bool("run_crawler_on_start")) { changed_xmp_files = dt_control_crawler_run(); } if(init_gui) { dt_control_init(darktable.control); } else { if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:")) dt_gui_presets_init(); // init preset db schema. darktable.control->running = 0; darktable.control->accelerators = NULL; dt_pthread_mutex_init(&darktable.control->run_mutex, NULL); } // initialize collection query darktable.collection = dt_collection_new(NULL); /* initialize selection */ darktable.selection = dt_selection_new(); /* capabilities set to NULL */ darktable.capabilities = NULL; // Initialize the password storage engine darktable.pwstorage = dt_pwstorage_new(); darktable.guides = dt_guides_init(); #ifdef HAVE_GRAPHICSMAGICK /* GraphicsMagick init */ InitializeMagick(darktable.progname); // *SIGH* dt_set_signal_handlers(); #endif darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); #ifdef HAVE_OPENCL dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics); #endif darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); darktable.noiseprofile_parser = dt_noiseprofile_init(noiseprofiles_from_command); // must come before mipmap_cache, because that one will need to access // image dimensions stored in here: darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t)); dt_image_cache_init(darktable.image_cache); darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t)); dt_mipmap_cache_init(darktable.mipmap_cache); // The GUI must be initialized before the views, because the init() // functions of the views depend on darktable.control->accels_* to register // their keyboard accelerators if(init_gui) { darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t)); if(dt_gui_gtk_init(darktable.gui)) return 1; dt_bauhaus_init(); } else darktable.gui = NULL; darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t)); dt_view_manager_init(darktable.view_manager); // check whether we were able to load darkroom view. if we failed, we'll crash everywhere later on. if(!darktable.develop) return 1; darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t)); dt_imageio_init(darktable.imageio); // load the darkroom mode plugins once: dt_iop_load_modules_so(); if(init_gui) { #ifdef HAVE_GPHOTO2 // Initialize the camera control. // this is done late so that the gui can react to the signal sent but before switching to lighttable! darktable.camctl = dt_camctl_new(); #endif darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t)); dt_lib_init(darktable.lib); dt_gui_gtk_load_config(); // init the gui part of views dt_view_manager_gui_init(darktable.view_manager); // Loading the keybindings char keyfile[PATH_MAX] = { 0 }; // First dump the default keymapping snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir); gtk_accel_map_save(keyfile); // Removing extraneous semi-colons from the default keymap strip_semicolons_from_keymap(keyfile); // Then load any modified keys if available snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir); if(g_file_test(keyfile, G_FILE_TEST_EXISTS)) gtk_accel_map_load(keyfile); else gtk_accel_map_save(keyfile); // Save the default keymap if none is present // initialize undo struct darktable.undo = dt_undo_init(); } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] after successful startup\n"); dt_print_mem_usage(); } dt_image_local_copy_synch(); /* init lua last, since it's user made stuff it must be in the real environment */ #ifdef USE_LUA dt_lua_init(darktable.lua_state.state, lua_command); #endif if(init_gui) { const char *mode = "lighttable"; // april 1st: you have to earn using dt first! or know that you can switch views with keyboard shortcuts time_t now; time(&now); struct tm lt; localtime_r(&now, <); if(lt.tm_mon == 3 && lt.tm_mday == 1) mode = "knight"; // we have to call dt_ctl_switch_mode_to() here already to not run into a lua deadlock. // having another call later is ok dt_ctl_switch_mode_to(mode); #ifndef MAC_INTEGRATION // load image(s) specified on cmdline. // this has to happen after lua is initialized as image import can run lua code // If only one image is listed, attempt to load it in darkroom int last_id = 0; gboolean only_single_images = TRUE; int loaded_images = 0; for(int i = 1; i < argc; i++) { gboolean single_image = FALSE; if(argv[i] == NULL || *argv[i] == '\0') continue; int new_id = dt_load_from_string(argv[i], FALSE, &single_image); if(new_id > 0) { last_id = new_id; loaded_images++; if(!single_image) only_single_images = FALSE; } } if(loaded_images == 1 && only_single_images) { dt_control_set_mouse_over_id(last_id); dt_ctl_switch_mode_to("darkroom"); } #endif } // last but not least construct the popup that asks the user about images whose xmp files are newer than the // db entry if(init_gui && changed_xmp_files) { dt_control_crawler_show_image_list(changed_xmp_files); } dt_print(DT_DEBUG_CONTROL, "[init] startup took %f seconds\n", dt_get_wtime() - start_wtime); return 0; }
LIS_INT main(LIS_INT argc, char* argv[]) { LIS_MATRIX A0,A; LIS_VECTOR x,b,u; LIS_SOLVER solver; LIS_INT m,n,nn,nnz; LIS_INT i,j,ii,jj,ctr; LIS_INT is,ie; LIS_INT nprocs,my_rank; int int_nprocs,int_my_rank; LIS_INT nsol; LIS_INT err,iter,mtype,iter_double,iter_quad; double time,itime,ptime,p_c_time,p_i_time; LIS_REAL resid; char solvername[128]; LIS_INT *ptr,*index; LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; lis_initialize(&argc, &argv); #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank); nprocs = int_nprocs; my_rank = int_my_rank; #else nprocs = 1; my_rank = 0; #endif if( argc < 6 ) { if( my_rank==0 ) { printf("Usage: %s m n matrix_type solution_filename rhistory_filename [options]\n", argv[0]); } CHKERR(1); } m = atoi(argv[1]); n = atoi(argv[2]); mtype = atoi(argv[3]); if( m<=0 || n<=0 ) { #ifdef _LONGLONG if( my_rank==0 ) printf("m=%lld <=0 or n=%lld <=0\n",m,n); #else if( my_rank==0 ) printf("m=%d <=0 or n=%d <=0\n",m,n); #endif CHKERR(1); } if( my_rank==0 ) { printf("\n"); #ifdef _LONGLONG printf("number of processes = %lld\n",nprocs); #else printf("number of processes = %d\n",nprocs); #endif } #ifdef _OPENMP if( my_rank==0 ) { #ifdef _LONGLONG printf("max number of threads = %lld\n",omp_get_num_procs()); printf("number of threads = %lld\n",omp_get_max_threads()); #else printf("max number of threads = %d\n",omp_get_num_procs()); printf("number of threads = %d\n",omp_get_max_threads()); #endif } #endif /* create matrix and vectors */ nn = m*n; err = lis_matrix_create(LIS_COMM_WORLD,&A); err = lis_matrix_set_size(A,0,nn); CHKERR(err); ptr = (LIS_INT *)malloc((A->n+1)*sizeof(LIS_INT)); if( ptr==NULL ) CHKERR(1); index = (LIS_INT *)malloc(5*A->n*sizeof(LIS_INT)); if( index==NULL ) CHKERR(1); value = (LIS_SCALAR *)malloc(5*A->n*sizeof(LIS_SCALAR)); if( value==NULL ) CHKERR(1); lis_matrix_get_range(A,&is,&ie); ctr = 0; for(ii=is;ii<ie;ii++) { i = ii/m; j = ii - i*m; if( i>0 ) { jj = ii - m; index[ctr] = jj; value[ctr++] = -1.0;} if( i<n-1 ) { jj = ii + m; index[ctr] = jj; value[ctr++] = -1.0;} if( j>0 ) { jj = ii - 1; index[ctr] = jj; value[ctr++] = -1.0;} if( j<m-1 ) { jj = ii + 1; index[ctr] = jj; value[ctr++] = -1.0;} index[ctr] = ii; value[ctr++] = 4.0; ptr[ii-is+1] = ctr; } ptr[0] = 0; err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A); CHKERR(err); err = lis_matrix_assemble(A); CHKERR(err); nnz = A->nnz; #ifdef USE_MPI MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A->comm); nnz = i; #endif #ifdef _LONGLONG if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nn,nn,nnz); #else if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nn,nn,nnz); #endif err = lis_matrix_duplicate(A,&A0); CHKERR(err); lis_matrix_set_type(A0,mtype); err = lis_matrix_convert(A,A0); CHKERR(err); lis_matrix_destroy(A); A = A0; err = lis_vector_duplicate(A,&u); CHKERR(err); err = lis_vector_duplicate(A,&b); CHKERR(err); err = lis_vector_duplicate(A,&x); CHKERR(err); err = lis_vector_set_all(1.0,u); lis_matvec(A,u,b); err = lis_solver_create(&solver); CHKERR(err); lis_solver_set_option("-print mem",solver); lis_solver_set_optionC(solver); err = lis_solve(A,b,x,solver); CHKERR(err); lis_solver_get_iterex(solver,&iter,&iter_double,&iter_quad); lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time); lis_solver_get_residualnorm(solver,&resid); lis_solver_get_solver(solver,&nsol); lis_solver_get_solvername(nsol,solvername); if( my_rank==0 ) { #ifdef _LONGLONG #ifdef _LONG__DOUBLE printf("%s: number of iterations = %lld \n",solvername, iter); #else printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad); #endif #else #ifdef _LONG__DOUBLE printf("%s: number of iterations = %d \n",solvername, iter); #else printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad); #endif #endif printf("%s: elapsed time = %e sec.\n",solvername,time); printf("%s: preconditioner = %e sec.\n",solvername, ptime); printf("%s: matrix creation = %e sec.\n",solvername, p_c_time); printf("%s: linear solver = %e sec.\n",solvername, itime); #ifdef _LONG__DOUBLE printf("%s: relative residual = %Le\n\n",solvername,resid); #else printf("%s: relative residual = %e\n\n",solvername,resid); #endif } /* write solution */ lis_output_vector(x,LIS_FMT_MM,argv[4]); /* write residual history */ lis_solver_output_rhistory(solver, argv[5]); lis_solver_destroy(solver); lis_matrix_destroy(A); lis_vector_destroy(b); lis_vector_destroy(x); lis_vector_destroy(u); lis_finalize(); LIS_DEBUG_FUNC_OUT; return 0; }
void FT_ProExpn_VNA() { int numprocs,myid,ID,tag=999; int count,NumSpe; int L,i,kj; int Lspe,spe,GL,Mul; int RestartRead_Succeed; double Sr,Dr; double norm_k,h,dum0; double rmin,rmax,r,sum; double kmin,kmax,Sk,Dk; double RGL[GL_Mesh + 2]; double *SumTmp; double tmp0,tmp1; double **SphB; double *tmp_SphB,*tmp_SphBp; double TStime, TEtime; /* for MPI */ MPI_Status stat; MPI_Request request; /* for OpenMP */ int OMPID,Nthrds,Nprocs; char fileFT[YOUSO10]; char operate[300]; FILE *fp; size_t size; dtime(&TStime); /* MPI */ MPI_Comm_size(mpi_comm_level1,&numprocs); MPI_Comm_rank(mpi_comm_level1,&myid); if (myid==Host_ID && 0<level_stdout) printf("<FT_ProExpn_VNA> Fourier transform of VNA separable projectors\n"); RestartRead_Succeed = 0; /*********************************************************** In case of Scf_RestartFromFile==1, read Spe_VNA_Bessel ***********************************************************/ if (Scf_RestartFromFile){ /**************************************************** regenerate radial grids in the k-space for the MPI calculation ****************************************************/ for (kj=0; kj<GL_Mesh; kj++){ kmin = Radial_kmin; kmax = PAO_Nkmax; Sk = kmax + kmin; Dk = kmax - kmin; norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk); GL_NormK[kj] = norm_k; } /*********************************************************** read Spe_VNA_Bessel ***********************************************************/ sprintf(fileFT,"%s%s_rst/%s.ftPEvna",filepath,filename,filename); if ((fp = fopen(fileFT,"rb")) != NULL){ RestartRead_Succeed = 1; for (spe=0; spe<SpeciesNum; spe++){ for (L=0; L<=List_YOUSO[35]; L++){ for (Mul=0; Mul<List_YOUSO[34]; Mul++){ size = fread(&Spe_VNA_Bessel[spe][L][Mul][0],sizeof(double),GL_Mesh,fp); if (size!=GL_Mesh) RestartRead_Succeed = 0; } } } fclose(fp); } else{ printf("Could not open a file %s in FT_ProExpn_VNA\n",fileFT); } } /*********************************************************** if (RestartRead_Succeed==0), calculate Spe_VNA_Bessel ***********************************************************/ if (RestartRead_Succeed==0){ for (Lspe=0; Lspe<MSpeciesNum; Lspe++){ spe = Species_Top[myid] + Lspe; /* initalize */ /* tabulation on Gauss-Legendre radial grid */ rmin = Spe_VPS_RV[spe][0]; rmax = Spe_Atom_Cut1[spe] + 0.5; Sr = rmax + rmin; Dr = rmax - rmin; for (i=0; i<GL_Mesh; i++){ RGL[i] = 0.50*(Dr*GL_Abscissae[i] + Sr); } kmin = Radial_kmin; kmax = PAO_Nkmax; Sk = kmax + kmin; Dk = kmax - kmin; /* loop for kj */ #pragma omp parallel shared(List_YOUSO,GL_Weight,GL_Abscissae,Dr,Dk,Sk,RGL,Projector_VNA,Spe_VPS_RV,Spe_Num_Mesh_VPS,Spe_VNA_Bessel) private(SumTmp,SphB,tmp_SphB,tmp_SphBp,OMPID,Nthrds,Nprocs,kj,norm_k,i,r,L,Mul,tmp0,dum0) { /* allocate arrays */ SumTmp = (double*)malloc(sizeof(double)*List_YOUSO[34]); SphB = (double**)malloc(sizeof(double*)*(List_YOUSO[35]+3)); for(L=0; L<(List_YOUSO[35]+3); L++){ SphB[L] = (double*)malloc(sizeof(double)*GL_Mesh); } tmp_SphB = (double*)malloc(sizeof(double)*(List_YOUSO[35]+3)); tmp_SphBp = (double*)malloc(sizeof(double)*(List_YOUSO[35]+3)); /* get info. on OpenMP */ OMPID = omp_get_thread_num(); Nthrds = omp_get_num_threads(); Nprocs = omp_get_num_procs(); for ( kj=OMPID; kj<GL_Mesh; kj+=Nthrds ){ norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk); /* calculate SphB */ for (i=0; i<GL_Mesh; i++){ r = RGL[i]; Spherical_Bessel(norm_k*r,List_YOUSO[35],tmp_SphB,tmp_SphBp); for(L=0; L<=List_YOUSO[35]; L++){ SphB[L][i] = tmp_SphB[L]; } } /* loop for L */ for (L=0; L<=List_YOUSO[35]; L++){ /**************************************************** \int jL(k*r)RL r^2 dr ****************************************************/ for (Mul=0; Mul<List_YOUSO[34]; Mul++) SumTmp[Mul] = 0.0; /* Gauss-Legendre quadrature */ for (i=0; i<GL_Mesh; i++){ r = RGL[i]; tmp0 = r*r*GL_Weight[i]*SphB[L][i]; for (Mul=0; Mul<List_YOUSO[34]; Mul++){ dum0 = PhiF(r, Projector_VNA[spe][L][Mul], Spe_VPS_RV[spe], Spe_Num_Mesh_VPS[spe]); SumTmp[Mul] += dum0*tmp0; } } for (Mul=0; Mul<List_YOUSO[34]; Mul++){ Spe_VNA_Bessel[spe][L][Mul][kj] = 0.5*Dr*SumTmp[Mul]; } } /* L */ } /* kj */ /* free arrays */ free(SumTmp); for(L=0; L<(List_YOUSO[35]+3); L++){ free(SphB[L]); } free(SphB); free(tmp_SphB); free(tmp_SphBp); #pragma omp flush(Spe_VNA_Bessel) } /* #pragma omp parallel */ } /* Lspe */ /**************************************************** regenerate radial grids in the k-space for the MPI calculation ****************************************************/ for (kj=0; kj<GL_Mesh; kj++){ kmin = Radial_kmin; kmax = PAO_Nkmax; Sk = kmax + kmin; Dk = kmax - kmin; norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk); GL_NormK[kj] = norm_k; } /*********************************************************** sending and receiving of Spe_VNA_Bessel by MPI ***********************************************************/ for (ID=0; ID<Num_Procs2; ID++){ NumSpe = Species_End[ID] - Species_Top[ID] + 1; for (Lspe=0; Lspe<NumSpe; Lspe++){ spe = Species_Top[ID] + Lspe; for (L=0; L<=List_YOUSO[35]; L++){ for (Mul=0; Mul<List_YOUSO[34]; Mul++){ MPI_Bcast(&Spe_VNA_Bessel[spe][L][Mul][0], GL_Mesh,MPI_DOUBLE,ID,mpi_comm_level1); } } } } /*********************************************************** save Spe_VNA_Bessel ***********************************************************/ if (myid==Host_ID){ sprintf(fileFT,"%s%s_rst/%s.ftPEvna",filepath,filename,filename); if ((fp = fopen(fileFT,"wb")) != NULL){ for (spe=0; spe<SpeciesNum; spe++){ for (L=0; L<=List_YOUSO[35]; L++){ for (Mul=0; Mul<List_YOUSO[34]; Mul++){ fwrite(&Spe_VNA_Bessel[spe][L][Mul][0],sizeof(double),GL_Mesh,fp); } } } fclose(fp); } else{ printf("Could not open a file %s in FT_ProExpn_VNA\n",fileFT); } } } /* if (RestartRead_Succeed==0) */ /*********************************************************** elapsed time ***********************************************************/ dtime(&TEtime); /* printf("myid=%2d Elapsed Time (s) = %15.12f\n",myid,TEtime-TStime); MPI_Finalize(); exit(0); */ }
int dt_init(int argc, char *argv[], const int init_gui) { // make everything go a lot faster. _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #ifndef __APPLE__ _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler); #endif #ifndef __SSE2__ fprintf(stderr, "[dt_init] unfortunately we depend on SSE2 instructions at this time.\n"); fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n"); return 1; #endif #ifdef M_MMAP_THRESHOLD mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */ #endif bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR); bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); textdomain (GETTEXT_PACKAGE); // init all pointers to 0: memset(&darktable, 0, sizeof(darktable_t)); darktable.progname = argv[0]; // database gchar *dbfilename_from_command = NULL; char *datadir_from_command = NULL; char *moduledir_from_command = NULL; char *tmpdir_from_command = NULL; char *configdir_from_command = NULL; char *cachedir_from_command = NULL; darktable.num_openmp_threads = 1; #ifdef _OPENMP darktable.num_openmp_threads = omp_get_num_procs(); #endif darktable.unmuted = 0; GSList *images_to_load = NULL; for(int k=1; k<argc; k++) { if(argv[k][0] == '-') { if(!strcmp(argv[k], "--help")) { return usage(argv[0]); } if(!strcmp(argv[k], "-h")) { return usage(argv[0]); } else if(!strcmp(argv[k], "--version")) { printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2013 johannes hanika\n"PACKAGE_BUGREPORT"\n"); return 1; } else if(!strcmp(argv[k], "--library")) { dbfilename_from_command = argv[++k]; } else if(!strcmp(argv[k], "--datadir")) { datadir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--moduledir")) { moduledir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--tmpdir")) { tmpdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--configdir")) { configdir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--cachedir")) { cachedir_from_command = argv[++k]; } else if(!strcmp(argv[k], "--localedir")) { bindtextdomain (GETTEXT_PACKAGE, argv[++k]); } else if(argv[k][1] == 'd' && argc > k+1) { if(!strcmp(argv[k+1], "all")) darktable.unmuted = 0xffffffff; // enable all debug information else if(!strcmp(argv[k+1], "cache")) darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module else if(!strcmp(argv[k+1], "control")) darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module else if(!strcmp(argv[k+1], "dev")) darktable.unmuted |= DT_DEBUG_DEV; // develop module else if(!strcmp(argv[k+1], "fswatch")) darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module else if(!strcmp(argv[k+1], "camctl")) darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module else if(!strcmp(argv[k+1], "perf")) darktable.unmuted |= DT_DEBUG_PERF; // performance measurements else if(!strcmp(argv[k+1], "pwstorage")) darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module else if(!strcmp(argv[k+1], "opencl")) darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl else if(!strcmp(argv[k+1], "sql")) darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries else if(!strcmp(argv[k+1], "memory")) darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then. else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff. else if(!strcmp(argv[k+1], "nan")) darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe. else return usage(argv[0]); k ++; } else if(argv[k][1] == 't' && argc > k+1) { darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100); printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads); k ++; } } #ifndef MAC_INTEGRATION else { images_to_load = g_slist_append(images_to_load, argv[k]); } #endif } if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] at startup\n"); dt_print_mem_usage(); } #ifdef _OPENMP omp_set_num_threads(darktable.num_openmp_threads); #endif dt_loc_init_datadir(datadir_from_command); dt_loc_init_plugindir(moduledir_from_command); if(dt_loc_init_tmp_dir(tmpdir_from_command)) { printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir); return usage(argv[0]); } dt_loc_init_user_config_dir(configdir_from_command); dt_loc_init_user_cache_dir(cachedir_from_command); #if !GLIB_CHECK_VERSION(2, 35, 0) g_type_init(); #endif // does not work, as gtk is not inited yet. // even if it were, it's a super bad idea to invoke gtk stuff from // a signal handler. /* check cput caps */ // dt_check_cpu(argc,argv); #ifdef HAVE_GEGL char geglpath[DT_MAX_PATH_LEN]; char datadir[DT_MAX_PATH_LEN]; dt_loc_get_datadir(datadir, DT_MAX_PATH_LEN); snprintf(geglpath, DT_MAX_PATH_LEN, "%s/gegl:/usr/lib/gegl-0.0", datadir); (void)setenv("GEGL_PATH", geglpath, 1); gegl_init(&argc, &argv); #endif // thread-safe init: dt_exif_init(); char datadir[DT_MAX_PATH_LEN]; dt_loc_get_user_config_dir (datadir,DT_MAX_PATH_LEN); char filename[DT_MAX_PATH_LEN]; snprintf(filename, DT_MAX_PATH_LEN, "%s/darktablerc", datadir); // intialize the config backend. this needs to be done first... darktable.conf = (dt_conf_t *)malloc(sizeof(dt_conf_t)); memset(darktable.conf, 0, sizeof(dt_conf_t)); dt_conf_init(darktable.conf, filename); // set the interface language const gchar* lang = dt_conf_get_string("ui_last/gui_language"); if(lang != NULL && lang[0] != '\0') { if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale(); } // initialize the database darktable.db = dt_database_init(dbfilename_from_command); if(darktable.db == NULL) { printf("ERROR : cannot open database\n"); return 1; } else if(dt_database_get_already_locked(darktable.db)) { // send the images to the other instance via dbus if(images_to_load) { GSList *p = images_to_load; // get a connection! GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL); while (p != NULL) { // make the filename absolute ... gchar *filename = dt_make_path_absolute((gchar*)p->data); if(filename == NULL) continue; // ... and send it to the running instance of darktable g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable", "org.darktable.service.Remote", "Open", g_variant_new ("(s)", filename), NULL, G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL); p = g_slist_next(p); g_free(filename); } g_slist_free(images_to_load); g_object_unref(connection); } return 1; } // Initialize the signal system darktable.signals = dt_control_signal_init(); // Initialize the filesystem watcher darktable.fswatch=dt_fswatch_new(); #ifdef HAVE_GPHOTO2 // Initialize the camera control darktable.camctl=dt_camctl_new(); #endif // get max lighttable thumbnail size: darktable.thumbnail_width = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"), 200, 3000); darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000); // and make sure it can be mip-mapped all the way from mip4 to mip0 darktable.thumbnail_width /= 16; darktable.thumbnail_width *= 16; darktable.thumbnail_height /= 16; darktable.thumbnail_height *= 16; // Initialize the password storage engine darktable.pwstorage=dt_pwstorage_new(); // FIXME: move there into dt_database_t dt_pthread_mutex_init(&(darktable.db_insert), NULL); dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL); dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL); darktable.control = (dt_control_t *)malloc(sizeof(dt_control_t)); memset(darktable.control, 0, sizeof(dt_control_t)); if(init_gui) { dt_control_init(darktable.control); } else { // this is in memory, so schema can't exist yet. if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:")) { dt_control_create_database_schema(); dt_gui_presets_init(); // also init preset db schema. } darktable.control->running = 0; darktable.control->accelerators = NULL; dt_pthread_mutex_init(&darktable.control->run_mutex, NULL); } // initialize collection query darktable.collection_listeners = NULL; darktable.collection = dt_collection_new(NULL); /* initialize sellection */ darktable.selection = dt_selection_new(); /* capabilities set to NULL */ darktable.capabilities = NULL; #ifdef HAVE_GRAPHICSMAGICK /* GraphicsMagick init */ InitializeMagick(darktable.progname); #endif darktable.opencl = (dt_opencl_t *)malloc(sizeof(dt_opencl_t)); memset(darktable.opencl, 0, sizeof(dt_opencl_t)); dt_opencl_init(darktable.opencl, argc, argv); darktable.blendop = (dt_blendop_t *)malloc(sizeof(dt_blendop_t)); memset(darktable.blendop, 0, sizeof(dt_blendop_t)); dt_develop_blend_init(darktable.blendop); darktable.points = (dt_points_t *)malloc(sizeof(dt_points_t)); memset(darktable.points, 0, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); // must come before mipmap_cache, because that one will need to access // image dimensions stored in here: darktable.image_cache = (dt_image_cache_t *)malloc(sizeof(dt_image_cache_t)); memset(darktable.image_cache, 0, sizeof(dt_image_cache_t)); dt_image_cache_init(darktable.image_cache); darktable.mipmap_cache = (dt_mipmap_cache_t *)malloc(sizeof(dt_mipmap_cache_t)); memset(darktable.mipmap_cache, 0, sizeof(dt_mipmap_cache_t)); dt_mipmap_cache_init(darktable.mipmap_cache); // The GUI must be initialized before the views, because the init() // functions of the views depend on darktable.control->accels_* to register // their keyboard accelerators if(init_gui) { darktable.gui = (dt_gui_gtk_t *)malloc(sizeof(dt_gui_gtk_t)); memset(darktable.gui,0,sizeof(dt_gui_gtk_t)); if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1; dt_bauhaus_init(); } else darktable.gui = NULL; darktable.view_manager = (dt_view_manager_t *)malloc(sizeof(dt_view_manager_t)); memset(darktable.view_manager, 0, sizeof(dt_view_manager_t)); dt_view_manager_init(darktable.view_manager); // load the darkroom mode plugins once: dt_iop_load_modules_so(); if(init_gui) { darktable.lib = (dt_lib_t *)malloc(sizeof(dt_lib_t)); memset(darktable.lib, 0, sizeof(dt_lib_t)); dt_lib_init(darktable.lib); dt_control_load_config(darktable.control); g_strlcpy(darktable.control->global_settings.dbname, filename, 512); // overwrite if relocated. } darktable.imageio = (dt_imageio_t *)malloc(sizeof(dt_imageio_t)); memset(darktable.imageio, 0, sizeof(dt_imageio_t)); dt_imageio_init(darktable.imageio); if(init_gui) { // Loading the keybindings char keyfile[DT_MAX_PATH_LEN]; // First dump the default keymapping snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc_default", datadir); gtk_accel_map_save(keyfile); // Removing extraneous semi-colons from the default keymap strip_semicolons_from_keymap(keyfile); // Then load any modified keys if available snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc", datadir); if(g_file_test(keyfile, G_FILE_TEST_EXISTS)) gtk_accel_map_load(keyfile); else gtk_accel_map_save(keyfile); // Save the default keymap if none is present // I doubt that connecting to dbus for darktable-cli makes sense darktable.dbus = dt_dbus_init(); // initialize undo struct darktable.undo = dt_undo_init(); // load image(s) specified on cmdline int id = 0; if(images_to_load) { // If only one image is listed, attempt to load it in darkroom gboolean load_in_dr = (g_slist_next(images_to_load) == NULL); GSList *p = images_to_load; while (p != NULL) { // don't put these function calls into MAX(), the macro will evaluate // it twice (and happily deadlock, in this particular case) int newid = dt_load_from_string((gchar*)p->data, load_in_dr); id = MAX(id, newid); p = g_slist_next(p); } if (!load_in_dr || id == 0) dt_ctl_switch_mode_to(DT_LIBRARY); g_slist_free(images_to_load); } else dt_ctl_switch_mode_to(DT_LIBRARY); } /* start the indexer background job */ dt_control_start_indexer(); if(darktable.unmuted & DT_DEBUG_MEMORY) { fprintf(stderr, "[memory] after successful startup\n"); dt_print_mem_usage(); } return 0; }
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: MAIN is the main program for SCHEDULE_OPENMP. Discussion: This program demonstrates the difference between default, static and dynamic scheduling for a loop parallelized in OpenMP. The purpose of scheduling is to deal with loops in which there is known or suspected imbalance in the work load. In this example, if the work is divided in the default manner between two threads, the second thread has 3 times the work of the first. Both static and dynamic scheduling, if used, even out the work so that both threads have about the same load. This could be expected to decrease the run time of the loop by about 1/3. Licensing: This code is distributed under the GNU LGPL license. Modified: 10 July 2010 Author: John Burkardt */ { int n; int n_factor; int n_hi; int n_lo; int primes; double time1; double time2; double time3; printf ( "\n" ); printf ( "SCHEDULE_OPENMP\n" ); printf ( " C/OpenMP version\n" ); printf ( " Count the primes from 1 to N.\n" ); printf ( " This is an unbalanced work load, particular for two threads.\n" ); printf ( " Demonstrate default, static and dynamic scheduling.\n" ); printf ( "\n" ); printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) ); printf ( " Number of threads = %d\n", omp_get_max_threads ( ) ); n_lo = 1; n_hi = 131072; n_factor = 2; printf ( "\n" ); printf ( " Default Static Dynamic\n" ); printf ( " N Pi(N) Time Time Time\n" ); printf ( "\n" ); n = n_lo; while ( n <= n_hi ) { time1 = omp_get_wtime ( ); primes = prime_default ( n ); time1 = omp_get_wtime ( ) - time1; time2 = omp_get_wtime ( ); primes = prime_static ( n ); time2 = omp_get_wtime ( ) - time2; time3 = omp_get_wtime ( ); primes = prime_dynamic ( n ); time3 = omp_get_wtime ( ) - time3; printf ( " %8d %8d %12f %12f %12f\n", n, primes, time1, time2, time3 ); n = n * n_factor; } /* Terminate. */ printf ( "\n" ); printf ( "SCHEDULE_OPENMP\n" ); printf ( " Normal end of execution.\n" ); return 0; }
int main(int argc, char* argv[]) { const char* program_name = "contact_profile"; bool optsOK = true; gmx::initForCommandLine(&argc,&argv); copyright(program_name); cout << " Computes the standard atomic contacts for structures in" << endl; cout << " the given xtc file. A topology PDB file and atom index file" << endl; cout << " should be provided for determining the atoms to compare." << endl; cout << " The resulting sparse contact distance profiles are" << endl; cout << " in sparse vector format (index-file and data-file)." << endl; cout << endl; cout << " Use -h or --help to see the complete list of options." << endl; cout << endl; // Option vars... int nthreads = 0; double sigma; double eps; string top_filename; string xtc_filename; string ndx_filename; const char* ndx_filename_ptr = NULL; string index_filename; string data_filename; // Declare the supported options. po::options_description cmdline_options; po::options_description program_options("Program options"); program_options.add_options() ("help,h", "show this help message and exit") ("threads,t", po::value<int>(&nthreads)->default_value(omp_get_max_threads()>omp_get_num_procs()?omp_get_num_procs():omp_get_max_threads()), "Input: Number of threads to start (int)") ("epsilon,e", po::value<double>(&eps)->default_value(9.0), "Input: Contact cutoff (real)") // ("sigma,q", po::value<double>(&sigma)->default_value(1), "Input: Standard deviation of gaussian kernel (real)") ("topology-file,p", po::value<string>(&top_filename)->default_value("topology.pdb"), "Input: Topology file [.pdb,.gro,.tpr] (string:filename)") ("xtc-file,x", po::value<string>(&xtc_filename)->default_value("traj.xtc"), "Input: Trajectory file (string:filename)") ("ndx-file,n", po::value<string>(&ndx_filename), "Input: K-nn distances file (string:filename)") ("index-file,i", po::value<string>(&index_filename)->default_value("reference.svi"), "Output: Sparse vector indices file (string:filename)") ("data-file,d", po::value<string>(&data_filename)->default_value("reference.svd"), "Output: Sparse vector data file (string:filename)") ; cmdline_options.add(program_options); po::variables_map vm; po::store(po::parse_command_line(argc, argv, cmdline_options), vm); po::notify(vm); if (vm.count("help")) { cout << "usage: " << program_name << " [options]" << endl; cout << cmdline_options << endl; return 1; } if (vm.count("ndx-file")) { ndx_filename_ptr = ndx_filename.c_str(); } if (!optsOK) { return -1; } cout << "Running with the following options:" << endl; cout << "threads = " << nthreads << endl; cout << "topology-file = " << top_filename << endl; cout << "xtc-file = " << xtc_filename << endl; cout << "ndx-file = " << ndx_filename << endl; cout << "index-file = " << index_filename << endl; cout << "data-file = " << data_filename << endl; cout << endl; // Local vars int step = 1; float time = 0.0; matrix box; float prec = 0.001; char buf[256]; t_topology top; int ePBC; int natoms = 0; int nframes= 0; int update_interval = 1; t_fileio *ref_file; rvec *mycoords = NULL; gmx_bool bOK = 1; double *contact = NULL; vector<coord_array> *ref_coords = NULL; ::real *weights = NULL; int gnx1,gnx2; atom_id *index1,*index2; char *grpname1,*grpname2; ofstream index; ofstream data; // Remove C stdout (silly GROMACS warnings going every which stream!) int myout = dup(1); dup2(2,1); // Setup threads omp_set_num_threads(nthreads); // Get number of atoms and check xtc cout << "Reading topology information from " << top_filename << " ... "; read_tps_conf(top_filename.c_str(), buf, &top, &ePBC, &mycoords, NULL, box, TRUE); cout << "done." << endl; delete [] mycoords; ref_file = open_xtc(xtc_filename.c_str(),"r"); read_first_xtc(ref_file,&natoms, &step, &time, box, &mycoords, &prec, &bOK); close_xtc(ref_file); if (natoms != top.atoms.nr) { cout << "*** ERROR ***" << endl; cout << "Number of atoms in topology file (" << top.atoms.nr << ") " << "does not match the number of atoms " << "in the XTC file (" << xtc_filename << " : " << natoms << ")." << endl; exit(4); } // Get atom selections cout << "Please select two (non-overlapping) groups for contact profiling..." << endl; get_index(&top.atoms,ndx_filename_ptr,1,&gnx1,&index1,&grpname1); cout << endl; get_index(&top.atoms,ndx_filename_ptr,1,&gnx2,&index2,&grpname2); cout << endl; cout << "Total grid size is " << gnx1 << " x " << gnx2 << " = " << (gnx1*gnx2) << endl; // Read coordinates and weight-center all structures cout << "Reading reference coordinates from file: " << xtc_filename << " ... "; ref_coords = new vector<coord_array>; ref_file = open_xtc(xtc_filename.c_str(),"r"); mycoords = new rvec[natoms]; while (read_next_xtc(ref_file, natoms, &step, &time, box, mycoords, &prec, &bOK)) { ref_coords->push_back(mycoords); mycoords = new rvec[natoms]; } close_xtc(ref_file); delete [] mycoords; mycoords = NULL; nframes = ref_coords->size(); cout << "done." << endl; // Allocate vectors for storing the distances for a structure contact = new double[gnx1*gnx2]; weights = new ::real[gnx1*gnx2]; for (int x = 0; x < natoms; x++) weights[x] = top.atoms.atom[x].m; #pragma omp parallel for for (int i = 0; i < gnx1; i++) for (int j = 0; j < gnx2; j++) { weights[(i*gnx2)+j] = top.atoms.atom[index1[i]].m * top.atoms.atom[index2[j]].m; } // Restore C stdout. dup2(myout,1); index.open(index_filename.c_str()); data.open(data_filename.c_str()); // Timer for ETA time_t start = std::time(0); time_t last = start; // Compute fits for (int frame = 0; frame < nframes; frame++) { // Update user of progress if (std::time(0) - last > update_interval) { last = std::time(0); time_t eta = start + ((last-start) * nframes / frame); cout << "\rFrame: " << frame << ", will finish " << string(std::ctime(&eta)).substr(0,20); cout.flush(); } // Do Work #pragma omp parallel for for (int i = 0; i < gnx1*gnx2; i++) contact[i] = 0.0; #pragma omp parallel for for (int i = 0; i < gnx1; i++) { int ii = index1[i]; for (int j = 0; j < gnx2; j++) { int jj = index2[j]; double d = 0.0; for (int k = 0; k < 3; k++) d += (((*ref_coords)[frame][ii][k] - (*ref_coords)[frame][jj][k]) * ((*ref_coords)[frame][ii][k] - (*ref_coords)[frame][jj][k])); d = sqrt(d) * 10.0; // d = exp(-(d*d) / (2.0 * weights[(i*gnx2)+j])); // if (d > eps) // contact[(i*gnx2)+j] = d; if (d < eps) contact[(i*gnx2)+j] = 1.0; } // j } // i double sum = 0.0; #pragma omp parallel for reduction(+:sum) for (int i = 0; i < gnx1*gnx2; i++) sum += contact[i]; sum = 1.0; // No normalization... int total = 0; #pragma omp parallel for reduction(+:total) for (int i = 0; i < gnx1*gnx2; i++) if (contact[i] > 0) { contact[i] /= sum; total++; } index.write((char*) &total, sizeof(int) / sizeof(char)); for (int i = 0; i < gnx1*gnx2; i++) if (contact[i] > 0.0) { index.write((char*) &i, sizeof(int) / sizeof(char)); data.write((char*) &contact[i], sizeof(double) / sizeof(char)); } // cout << frame << " " << total << endl; } // frame cout << endl << endl; index.close(); data.close(); // Clean coordinates for (vector<coord_array>::iterator itr = ref_coords->begin(); itr != ref_coords->end(); itr++) delete [] (*itr); delete ref_coords; delete [] contact; delete [] weights; return 0; }
int main(int argc,char* argv[]) { PlasmaData pdata(argc,argv); gnuplot_ctrl* plot; gnuplot_ctrl* plot_anim; plot = gnuplot_init(); plot_anim = gnuplot_init(); gnuplot_setstyle(plot,"lines"); gnuplot_setstyle(plot_anim,"points"); gnuplot_cmd(plot_anim,"set term gif animate nooptimize size 1280,1280 xffffffff"); gnuplot_cmd(plot_anim,"set output \"particles.gif\""); gnuplot_cmd(plot_anim,"set xrange [-1:1]"); gnuplot_cmd(plot_anim,"set yrange [-1:1]"); float xmin = 0; float ymin = 0; float zmin = 0; float Lx = 5.0; float Ly = 5.0; float Lz = 5.0; int nx = 64; int ny = 64; int nz = 64; int nspecies = 1; const float dt = 0.01; const float dtau0 = 0.1; const int nptcls = 500; const int steps = 200; int iptcl[nptcls]; float Ey = 5.0; float Bz = 100.0; pdata.nx = nx; pdata.ny = ny; pdata.nz = nz; pdata.Lx = Lx; pdata.Ly = Ly; pdata.Lz = Lz; pdata.xmin = xmin; pdata.ymin = ymin; pdata.zmin = zmin; pdata.epsilon_a = 1.0e-4; pdata.epsilon_r = 1.0e-10; pdata.dt = dt; pdata.niter_max = 20; pdata.nSubcycle_max = 1000; pdata.Bmag_avg = 1.0; pdata.ndimensions = 3; pdata.setup(); FieldDataCPU fields; ParticleListCPU particles; HOMoments* moments; int numprocs = omp_get_num_procs(); moments = (HOMoments*)malloc(numprocs*sizeof(HOMoments)); for(int i=0;i<numprocs;i++) { moments[i] = *new HOMoments(&pdata); } float x_plot[nptcls][steps]; float y_plot[nptcls][steps]; float gx_plot[nptcls][steps]; float gy_plot[nptcls][steps]; float error_array[nptcls]; //float x_plot_a[nptcls]; //float y_plot_a[nptcls]; fields.allocate(&pdata); particles.allocate(nptcls); fields.dx = pdata.dxdi; fields.dy = pdata.dydi; fields.dz = pdata.dzdi; particles.ispecies = 0; for(int i=0;i<nptcls;i++) { iptcl[i] = i; particles.px[i] = rand()%10000/10000.0; particles.py[i] = rand()%10000/10000.0; particles.pz[i] = 0.5; particles.ix[i] = nx/2; particles.iy[i] = ny/2; particles.iz[i] = nz/2; particles.vx[i] = 0.5*(2*(rand()%10000))/10000.0 + 0.5; particles.vy[i] = 0.5*(2*(rand()%10000))/10000.0 + 0.5; particles.vz[i] = 0.0* (rand()%50000 / 50000.0f - 0.5); error_array[i] = 0; } // Setup E-field for(int i=0;i<nx;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nz;k++) { float x = i*pdata.dxdi+xmin; float y = j*pdata.dydi+ymin; float z = k*pdata.dzdi+zmin; float Ex = -1.0*x; fields.getE(i,j,k,0) = 0; fields.getE(i,j,k,1) = Ey; fields.getE(i,j,k,2) = 0; fields.getB(i,j,k,0) = 0; fields.getB(i,j,k,1) = 0; fields.getB(i,j,k,2) = Bz; // printf("fields(%i,%i,%i) = %f, %f, %f\n",i,j,k, // fields.getE(i,j,k,0),fields.getE(i,j,k,1),fields.getE(i,j,k,2)); } } } fields.q2m[0] = 1.0; printf("Efield setup complete\n"); float time; double avg_error = 0.0; int n_error = 0; CPUTimer timer; moments->init_plot(); timer.start(); for(int i=0;i<steps;i++) { //time = dtau0*(i); //moments.set_vals(0); particles.push(&pdata,&fields,moments); printf("finished step %i\n",i); for(int j=0;j<nptcls;j++) { float px,py,gx,gy; float rl; float vx,vy,vxy,vz,vxyz; float vgx,vgy; float verror; px = (particles.px[j] + particles.ix[j])*pdata.dxdi + pdata.xmin; py = (particles.py[j] + particles.iy[j])*pdata.dydi + pdata.ymin; vx = particles.vx[j]; vy = particles.vy[j]; vz = particles.vz[j]; vxy = sqrt(vx*vx+vy*vy); vxyz = sqrt(vxy*vxy + vz*vz); rl = vxy/Bz; gx = vy*Bz/sqrt(vx*Bz*vx*Bz + vy*Bz*vy*Bz)*rl + px; gy = -vx*Bz/sqrt(vx*Bz*vx*Bz + vy*Bz*vy*Bz)*rl + py; x_plot[j][i] = px; y_plot[j][i] = py; gx_plot[j][i] = gx; gy_plot[j][i] = gy; if(i >= 1) { vgx = (gx_plot[j][i] - gx_plot[j][0])/(dt*(i)); vgy = (gy_plot[j][i] - gy_plot[j][0])/(dt*(i)); verror = fabs(Ey/Bz - vgx)/(Ey/Bz); error_array[j] = fmax(error_array[j],verror); avg_error += verror; n_error ++; // printf("true[%i] v = %e, %e actual v = %e, %e, error = %e\n", // j,Ey/Bz,0.0f,vgx,vgy,verror); } } //if((i+1)%64 == 0) //gnuplot_resetplot(plot_anim); /* float diff_avg = 0.0; for(int j=0;j<nptcls;j++) { x_plot[j][i] = (particles.px[j] + particles.ix[j])*pdata.dxdi + pdata.xmin; y_plot[j][i] = (particles.py[j] + particles.iy[j])*pdata.dydi + pdata.ymin; //printf("particle %i with position %f, %f\n",j,x_plot[j][i],y_plot[j][i]); // x_plot_a[j] = x_plot[j][i]; // y_plot_a[j] = y_plot[j][i]; } */ //avg_error += diff_avg / steps; //gnuplot_plot_xy(plot_anim,x_plot_a,y_plot_a,nptcls,NULL); } timer.stop(); printf("average error = %e \n",avg_error/((float)n_error)); printf("Run did %f particles per second\n",nptcls*steps/(timer.diff()*1.0e-3)); for(int j=0;j<nptcls;j++) { if(error_array[j] >= 1.0e-2) gnuplot_plot_xy(plot,x_plot[j],y_plot[j],steps,NULL); } //moments->plot(nz/2,0,HOMoments_currentx); printf("Press 'Enter' to continue\n"); getchar(); moments->close_plot(); gnuplot_close(plot); gnuplot_close(plot_anim); }
int main () /******************************************************************************/ /* Purpose: MAIN is the main program for MD_OPENMP. Discussion: MD implements a simple molecular dynamics simulation. The program uses Open MP directives to allow parallel computation. The velocity Verlet time integration scheme is used. The particles interact with a central pair potential. Output of the program is saved in the TNG format, which is why this code is included in the TNG API release. The high-level API of the TNG API is used where appropriate. Licensing: This code is distributed under the GNU LGPL license. Modified: 8 Jan 2013 Author: Original FORTRAN77 version by Bill Magro. C version by John Burkardt. TNG trajectory output by Magnus Lundborg. Parameters: None */ { float *acc; float *box; float *box_shape; float dt = 0.0002; float e0; float *force; int i; float kinetic; float mass = 1.0; int nd = 3; int np = 50; float *pos; float potential; int proc_num; int seed = 123456789; int step; int step_num = 50000; int step_print; int step_print_index; int step_print_num; int step_save; float *vel; float wtime; tng_trajectory_t traj; tng_molecule_t molecule; tng_chain_t chain; tng_residue_t residue; tng_atom_t atom; timestamp ( ); proc_num = omp_get_num_procs ( ); acc = ( float * ) malloc ( nd * np * sizeof ( float ) ); box = ( float * ) malloc ( nd * sizeof ( float ) ); box_shape = (float *) malloc (9 * sizeof (float)); force = ( float * ) malloc ( nd * np * sizeof ( float ) ); pos = ( float * ) malloc ( nd * np * sizeof ( float ) ); vel = ( float * ) malloc ( nd * np * sizeof ( float ) ); printf ( "\n" ); printf ( "MD_OPENMP\n" ); printf ( " C/OpenMP version\n" ); printf ( "\n" ); printf ( " A molecular dynamics program.\n" ); printf ( "\n" ); printf ( " NP, the number of particles in the simulation is %d\n", np ); printf ( " STEP_NUM, the number of time steps, is %d\n", step_num ); printf ( " DT, the size of each time step, is %f\n", dt ); printf ( "\n" ); printf ( " Number of processors available = %d\n", proc_num ); printf ( " Number of threads = %d\n", omp_get_max_threads ( ) ); printf("\n"); printf(" Initializing trajectory storage.\n"); /* Initialize the TNG trajectory */ tng_util_trajectory_open(TNG_EXAMPLE_FILES_DIR "tng_md_out.tng", 'w', &traj); /* Set molecules data */ /* N.B. This is still not done using utility functions. The low-level API * is used. */ printf(" Creating molecules in trajectory.\n"); tng_molecule_add(traj, "water", &molecule); tng_molecule_chain_add(traj, molecule, "W", &chain); tng_chain_residue_add(traj, chain, "WAT", &residue); if(tng_residue_atom_add(traj, residue, "O", "O", &atom) == TNG_CRITICAL) { tng_util_trajectory_close(&traj); printf(" Cannot create molecules.\n"); exit(1); } tng_molecule_cnt_set(traj, molecule, np); /* Set the dimensions of the box. */ for(i = 0; i < 9; i++) { box_shape[i] = 0.0; } for ( i = 0; i < nd; i++ ) { box[i] = 10.0; /* box_shape stores 9 values according to the TNG specs */ box_shape[i*nd + i] = box[i]; } printf ( "\n" ); printf ( " Initializing positions, velocities, and accelerations.\n" ); /* Set initial positions, velocities, and accelerations. */ initialize ( np, nd, box, &seed, pos, vel, acc ); /* Compute the forces and energies. */ printf ( "\n" ); printf ( " Computing initial forces and energies.\n" ); compute ( np, nd, pos, vel, mass, force, &potential, &kinetic ); e0 = potential + kinetic; /* Saving frequency */ step_save = 400; step_print = 0; step_print_index = 0; step_print_num = 10; /* This is the main time stepping loop: Compute forces and energies, Update positions, velocities, accelerations. */ printf(" Every %d steps box shape, particle positions, velocities and forces are\n", step_save); printf(" saved to a TNG trajectory file.\n"); printf ( "\n" ); printf ( " At certain step intervals, we report the potential and kinetic energies.\n" ); printf ( " The sum of these energies should be a constant.\n" ); printf ( " As an accuracy check, we also print the relative error\n" ); printf ( " in the total energy.\n" ); printf ( "\n" ); printf ( " Step Potential Kinetic (P+K-E0)/E0\n" ); printf ( " Energy P Energy K Relative Energy Error\n" ); printf ( "\n" ); step = 0; printf ( " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); step_print_index++; step_print = ( step_print_index * step_num ) / step_print_num; /* Set the output frequency of box shape, positions, velocities and forces */ if(tng_util_box_shape_write_frequency_set(traj, step_save) != TNG_SUCCESS) { printf("Error setting writing frequency data. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_pos_write_frequency_set(traj, step_save) != TNG_SUCCESS) { printf("Error setting writing frequency data. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_vel_write_frequency_set(traj, step_save) != TNG_SUCCESS) { printf("Error setting writing frequency data. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_force_write_frequency_set(traj, step_save) != TNG_SUCCESS) { printf("Error setting writing frequency data. %s: %d\n", __FILE__, __LINE__); exit(1); } /* Write the first frame of box shape, positions, velocities and forces */ if(tng_util_box_shape_write(traj, 0, box_shape) != TNG_SUCCESS) { printf("Error writing box shape. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_pos_write(traj, 0, pos) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_vel_write(traj, 0, vel) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_force_write(traj, 0, force) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); exit(1); } wtime = omp_get_wtime ( ); for ( step = 1; step < step_num; step++ ) { compute ( np, nd, pos, vel, mass, force, &potential, &kinetic ); if ( step == step_print ) { printf ( " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); step_print_index++; step_print = ( step_print_index * step_num ) / step_print_num; } if(step % step_save == 0) { /* Write box shape, positions, velocities and forces */ if(tng_util_box_shape_write(traj, step, box_shape) != TNG_SUCCESS) { printf("Error writing box shape. %s: %d\n", __FILE__, __LINE__); exit(1); } if(tng_util_pos_write(traj, step, pos) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); break; } if(tng_util_vel_write(traj, step, vel) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); break; } if(tng_util_force_write(traj, step, force) != TNG_SUCCESS) { printf("Error adding data. %s: %d\n", __FILE__, __LINE__); break; } } update ( np, nd, pos, vel, force, acc, mass, dt ); } wtime = omp_get_wtime ( ) - wtime; printf ( "\n" ); printf ( " Elapsed time for main computation:\n" ); printf ( " %f seconds.\n", wtime ); free ( acc ); free ( box ); free ( box_shape ); free ( force ); free ( pos ); free ( vel ); /* Close the TNG output. */ tng_util_trajectory_close(&traj); printf ( "\n" ); printf ( "MD_OPENMP\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
int main( int argc, char *argv[] ) { // try { time_t programStartTime(time(NULL)); boost::filesystem::path workingDir( boost::filesystem::current_path()); // ========== PROGRAM PARAMETERS ========== std::string progName("partitiontree"); std::string configFilename("../../config/"+progName+".cfg"); unsigned int threads(0), levelDepth(3), filterRadius(0); bool verbose(false), niftiMode( true ); // program parameters std::string treeFilename, outputFolder; // Declare a group of options that will be allowed only on command line boost::program_options::options_description genericOptions("Generic options"); genericOptions.add_options() ( "version", "Program version" ) ( "help,h", "Produce extended program help message" ) ( "tree,t", boost::program_options::value< std::string >(&treeFilename), "file with the tree to compute partitions from") ( "outputf,O", boost::program_options::value< std::string >(&outputFolder), "output folder where partition files will be written") ( "search-depth,d", boost::program_options::value< unsigned int >(&levelDepth)->implicit_value(3), "[opt] optimal partition search depth (default = 3)") ( "filter-radius,r", boost::program_options::value< unsigned int >(&filterRadius)->implicit_value(0), "[opt] output partition filter kernel radius (default = 0 | no filtering)") ( "hoz", "[opt] obtain horizontal cut partitions (instead of Spread-Separation ones)") ( "maxgran,m", "[opt] obtain only the maximum granularity partition") ; // Declare a group of options that will be allowed both on command line and in config file boost::program_options::options_description configOptions("Configuration"); configOptions.add_options() ( "verbose,v", "[opt] verbose output." ) ( "vista", "[opt] use vista file format (default is nifti)." ) ( "pthreads,p", boost::program_options::value< unsigned int >(&threads), "[opt] number of processing threads to run the program in parallel, default: all available") ; // Hidden options, will be allowed both on command line and in config file, but will not be shown to the user. boost::program_options::options_description hiddenOptions("Hidden options"); //hiddenOptions.add_options() ; boost::program_options::options_description cmdlineOptions; cmdlineOptions.add(genericOptions).add(configOptions).add(hiddenOptions); boost::program_options::options_description configFileOptions; configFileOptions.add(configOptions).add(hiddenOptions); boost::program_options::options_description visibleOptions("Allowed options"); visibleOptions.add(genericOptions).add(configOptions); boost::program_options::positional_options_description posOpt; //this arguments do not need to specify the option descriptor when typed in //posOpt.add("roi-file", -1); boost::program_options::variables_map variableMap; store(boost::program_options::command_line_parser(argc, argv).options(cmdlineOptions).positional(posOpt).run(), variableMap); std::ifstream ifs(configFilename.c_str()); store(parse_config_file(ifs, configFileOptions), variableMap); notify(variableMap); if (variableMap.count("help")) { std::cout << "---------------------------------------------------------------------------" << std::endl; std::cout << std::endl; std::cout << " Project: hClustering" << std::endl; std::cout << std::endl; std::cout << " Whole-Brain Connectivity-Based Hierarchical Parcellation Project" << std::endl; std::cout << " David Moreno-Dominguez" << std::endl; std::cout << " [email protected]" << std::endl; std::cout << " [email protected]" << std::endl; std::cout << " www.cbs.mpg.de/~moreno" << std::endl; std::cout << std::endl; std::cout << " For more reference on the underlying algorithm and research they have been used for refer to:" << std::endl; std::cout << " - Moreno-Dominguez, D., Anwander, A., & Knösche, T. R. (2014)." << std::endl; std::cout << " A hierarchical method for whole-brain connectivity-based parcellation." << std::endl; std::cout << " Human Brain Mapping, 35(10), 5000-5025. doi: http://dx.doi.org/10.1002/hbm.22528" << std::endl; std::cout << " - Moreno-Dominguez, D. (2014)." << std::endl; std::cout << " Whole-brain cortical parcellation: A hierarchical method based on dMRI tractography." << std::endl; std::cout << " PhD Thesis, Max Planck Institute for Human Cognitive and Brain Sciences, Leipzig." << std::endl; std::cout << " ISBN 978-3-941504-45-5" << std::endl; std::cout << std::endl; std::cout << " hClustering is free software: you can redistribute it and/or modify" << std::endl; std::cout << " it under the terms of the GNU Lesser General Public License as published by" << std::endl; std::cout << " the Free Software Foundation, either version 3 of the License, or" << std::endl; std::cout << " (at your option) any later version." << std::endl; std::cout << " http://creativecommons.org/licenses/by-nc/3.0" << std::endl; std::cout << std::endl; std::cout << " hClustering is distributed in the hope that it will be useful," << std::endl; std::cout << " but WITHOUT ANY WARRANTY; without even the implied warranty of" << std::endl; std::cout << " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the" << std::endl; std::cout << " GNU Lesser General Public License for more details." << std::endl; std::cout << std::endl; std::cout << "---------------------------------------------------------------------------" << std::endl << std::endl; std::cout << "partitiontree" << std::endl << std::endl; std::cout << "Obtain tree partitions at all granularity levels using the Spread-Separation method (finding the the partition with highest SS index at each granularity)." << std::endl; std::cout << " Optimal SS value for each partition is searched within a defined search-depth hierarchical levels. Final partitions can be filtered with a defined kernel size." << std::endl; std::cout << " to keep local SS maxima within that kernel. For SS index refer to (Moreno-Dominguez, 2014)" << std::endl; std::cout << " For an interactive 3D partition management with more options please use the Hierarchcial Clustering module developed in OpenWalnut (www.openwalnut.org)." << std::endl << std::endl; std::cout << "* Arguments:" << std::endl << std::endl; std::cout << " --version: Program version." << std::endl << std::endl; std::cout << " -h --help: produce extended program help message." << std::endl << std::endl; std::cout << " -t --tree: File with the hierarchical tree to extract partitions from." << std::endl << std::endl; std::cout << " -O --outputf: Output folder where partition files will be written." << std::endl << std::endl; std::cout << "[-d --search-depth]: Search optimal partition for each granularity within d hierarchical levels." << std::endl; std::cout << " A higher value will produce more optimized partition but will increase computing time." << std::endl; std::cout << " Default: 3. Recommendened values: 3 for good quality and fast computation, 4 for enhanced quality." << std::endl << std::endl; std::cout << "[-r --filter-radius]: Filter output partitions to keep only local SS (partition quality) maxima" << std::endl; std::cout << " within a r-sized kernel across the granularity dimension." << std::endl << std::endl; std::cout << "[-h --hoz]: Write horizontal cut partitions instead of SS ones (optimal partition search is still based on SS index)." << std::endl << std::endl; std::cout << "[-m --maxgran]: Compute and write only the maximum granularity (meta-leaves) partition." << std::endl << std::endl; std::cout << "[-v --verbose]: verbose output (recommended)." << std::endl << std::endl; std::cout << "[--vista]: write output tree in vista coordinates (default is nifti)." << std::endl << std::endl; std::cout << "[-p --pthreads]: number of processing threads to run the program in parallel. Default: use all available processors." << std::endl << std::endl; std::cout << std::endl; std::cout << "* Usage example:" << std::endl << std::endl; std::cout << " partitiontree -t tree_lh.txt -O results/ -d 3 -r 50 -v" << std::endl << std::endl; std::cout << std::endl; std::cout << "* Outputs (in output folder defined at option -O):" << std::endl << std::endl; std::cout << " (default outputs)" << std::endl; std::cout << " - 'allSSparts_dX.txt' - (where X is the search depth level defined at parameter -d) Contains a summary of the partition information (cut value and size) for all granularities." << std::endl; std::cout << " - 'TREE_SSparts_dX.txt' - (where TREE is the filename of the input tree defined at parameter -t) contains a copy of the original tree file with the partitions at all granularities included in the relevant fields." << std::endl; std::cout << " - 'partitiontree_log.txt' - A text log file containing the parameter details and in-run and completion information of the program." << std::endl; std::cout << std::endl; std::cout << " (additional if using option -r)" << std::endl; std::cout << " - 'filtSSparts_dX_rY.txt' - (where Y is the filter radius defined at parameter -r) Contains a summary of the resulting filtered partitions." << std::endl; std::cout << " - 'TREE_SSparts_dX_rY.txt' - contains a copy of the original tree file with the resulting filtered partitions included in the relevant fields." << std::endl; std::cout << std::endl; std::cout << " (when using --hoz option, the prefix 'SS' will be replaced by 'Hoz'')" << std::endl; std::cout << std::endl; std::cout << " (alternative outputs when using option --maxgran)" << std::endl; std::cout << " - 'fmaxgranPart.txt' - Contains the size information of the resulting maximal granularity partition for that tree." << std::endl; std::cout << " - 'TREE_maxgranPart.txt' - contains a copy of the original tree file with the resulting max granularity partition included in the relevant fields." << std::endl; std::cout << std::endl; exit(0); } if (variableMap.count("version")) { std::cout << progName <<", version 2.0"<<std::endl; exit(0); } if (variableMap.count("verbose")) { std::cout << "verbose output"<<std::endl; verbose=true; } if (variableMap.count("pthreads")) { if (threads==1) { std::cout <<"Using a single processor"<< std::endl; } else if(threads==0 || threads>=omp_get_num_procs()) { threads = omp_get_num_procs(); std::cout <<"Using all available processors ("<< threads <<")." << std::endl; } else { std::cout <<"Using a maximum of "<< threads <<" processors "<< std::endl; } omp_set_num_threads( threads ); } else { threads = omp_get_num_procs(); omp_set_num_threads( threads ); std::cout <<"Using all available processors ("<< threads <<")." << std::endl; } if ( variableMap.count( "vista" ) ) { if( verbose ) { std::cout << "Using vista format" << std::endl; } fileManagerFactory fmf; fmf.setVista(); niftiMode = false; } else { if( verbose ) { std::cout << "Using nifti format" << std::endl; } fileManagerFactory fmf; fmf.setNifti(); niftiMode = true; } if (variableMap.count("tree")) { if(!boost::filesystem::is_regular_file(boost::filesystem::path(treeFilename))) { std::cerr << "ERROR: tree file \""<<treeFilename<<"\" is not a regular file"<<std::endl; std::cerr << visibleOptions << std::endl; exit(-1); } std::cout << "Roi voxels file: "<< treeFilename << std::endl; } else { std::cerr << "ERROR: no tree file stated"<<std::endl; std::cerr << visibleOptions << std::endl; exit(-1); } if (variableMap.count("outputf")) { if(!boost::filesystem::is_directory(boost::filesystem::path(outputFolder))) { std::cerr << "ERROR: output folder \""<<outputFolder<<"\" is not a directory"<<std::endl; std::cerr << visibleOptions << std::endl; exit(-1); } std::cout << "Output folder: "<< outputFolder << std::endl; } else { std::cerr << "ERROR: no output folder stated"<<std::endl; std::cerr << visibleOptions << std::endl; exit(-1); } if (variableMap.count("maxgran")) { std::cout<<"Obtaining only max. granularity partition..."<<std::endl; WHtree tree(treeFilename); std::cout<<tree.getReport( false )<<std::endl; if( tree.testRootBaseNodes() ) { std::vector<size_t > maxpart( tree.getRootBaseNodes() ); std::vector<std::vector<size_t > > partitionVector( 1, maxpart); std::vector<float > partitionValues(1,0); std::cout<<"maxgranpart size: "<<std::endl<<maxpart.size()<<std::endl; WHtreePartition partitioner(&tree); std::string outPartFilename( outputFolder + "/maxgranPart.txt" ); partitioner.writePartitionSet( outPartFilename, partitionValues,partitionVector); tree.insertPartitions( partitionVector, partitionValues ); std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_maxgranPart" ); outTreeFilename += ( ".txt" ); tree.writeTree( outTreeFilename, niftiMode ); return 0; } else { std::cout<<"ERROR: tree does not have a maximum granularity meta-leaf partition"<<std::endl; return(-1); } } if( levelDepth > 5 ) { std::cout << "Level depth indicated: " << levelDepth << " is too high, setting to a maximum of 5" << std::endl; levelDepth = 5; } std::cout << "Using a search depth of: " << levelDepth << std::endl; if( filterRadius > 1000 ) { std::cout << "filter radius indicated: " << filterRadius << " is too high (max is 1000), setting to 100" << std::endl; filterRadius = 10; } if( filterRadius == 0 ) { std::cout << "using no filtering (radius 0)" << std::endl; } else if( filterRadius < 0 ) { std::cout << "filter radius indicated: " << filterRadius << " must be positive. using no filtering (radius 0)" << std::endl; filterRadius = 0; } else { std::cout << "Using a filter radius of: " << filterRadius << std::endl; } ///////////////////////////////////////////////////////////////// std::string logFilename(outputFolder+"/"+progName+"_log.txt"); std::ofstream logFile(logFilename.c_str()); if(!logFile) { std::cerr << "ERROR: unable to open log file: \""<<logFilename<<"\""<<std::endl; exit(-1); } logFile <<"Start Time:\t"<< ctime(&programStartTime) <<std::endl; logFile <<"Working directory:\t"<< workingDir.string() <<std::endl; logFile <<"Verbose:\t"<< verbose <<std::endl; logFile <<"Tree file:\t"<< treeFilename <<std::endl; logFile <<"Output folder:\t"<< outputFolder <<std::endl; logFile <<"Verbose:\t"<< verbose <<std::endl; if( niftiMode ) { logFile << "Using nifti file format" << std::endl; } else { logFile << "Using vista file format" << std::endl; } WHtree tree(treeFilename); logFile << tree.getReport( false ) <<std::endl; std::cout<<tree.getReport( false )<<std::endl; std::vector< float > partitionValues; std::vector< std::vector< size_t> > partitionVector; WHtreePartition treePartition(&tree); std::string prefix; if (variableMap.count("hoz")) { prefix = "Hoz"; std::cout <<"getting hoz partitions at all levels..." <<std::endl; treePartition.scanHozPartitions( &partitionValues, &partitionVector ); std::cout << partitionValues.size() << " Partitions obtained, writing to file..." <<std::endl; logFile <<"Initial partitions:\t"<< partitionValues.size() <<std::endl; std::string outPartFilename( outputFolder + "/all" + prefix + "parts.txt" ); treePartition.writePartitionSet( outPartFilename, partitionValues, partitionVector); tree.insertPartitions( partitionVector, partitionValues ); std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) ); outTreeFilename += ( ".txt" ); tree.writeTree( outTreeFilename, niftiMode ); } else { prefix = "SS"; std::cout <<"getting SS partitions at all levels..." <<std::endl; treePartition.scanOptimalPartitions( levelDepth, &partitionValues, &partitionVector ); std::cout << partitionValues.size() << " Partitions obtained, writing to file..." <<std::endl; logFile <<"Initial partitions:\t"<< partitionValues.size() <<std::endl; std::string outPartFilename( outputFolder + "/all" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) + ".txt" ); treePartition.writePartitionSet( outPartFilename, partitionValues, partitionVector); tree.insertPartitions( partitionVector, partitionValues ); std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) ); outTreeFilename += ( ".txt" ); tree.writeTree( outTreeFilename, niftiMode ); } std::vector < unsigned int > filterRadii; //filterRadii.reserve( 6 ); // filterRadii.push_back( 1 ); // filterRadii.push_back( 2 ); // filterRadii.push_back( 5 ); // filterRadii.push_back( 10 ); // filterRadii.push_back( 15 ); // filterRadii.push_back( 20 ); filterRadii.push_back( filterRadius ); for(size_t i=0; i< filterRadii.size(); ++i) { if( filterRadii[i] <= 0 ) { continue; } std::vector< float > filtPartValues( partitionValues ); std::vector< std::vector< size_t> > filtPartVector( partitionVector ); std::cout << "Filtering with a radius of "<< filterRadii[i] << "..." <<std::endl; treePartition.filterMaxPartitions( filterRadii[i], &filtPartValues, &filtPartVector ); std::cout << filtPartValues.size() << " Filtered partitions obtained, writing to file..." <<std::endl; logFile <<"Filtered partitions:\t"<< filtPartValues.size() <<std::endl; std::string outPartFilename( outputFolder + "/filt" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) ); outPartFilename += ( "_r" + boost::lexical_cast<std::string>(filterRadii[i]) + ".txt" ); treePartition.writePartitionSet(outPartFilename, filtPartValues, filtPartVector); std::cout << "Adding filtered partitions to tree and writing..." <<std::endl; std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) ); outTreeFilename += ( "_r" + boost::lexical_cast<std::string>(filterRadii[i]) + ".txt" ); tree.insertPartitions( filtPartVector, filtPartValues ); tree.writeTree( outTreeFilename, niftiMode ); } ///////////////////////////////////////////////////////////////// // save and print total time time_t programEndTime(time(NULL)); int totalTime( difftime(programEndTime,programStartTime) ); std::cout <<"Program Finished, total time: "<< totalTime/3600 <<"h "<< (totalTime%3600)/60 <<"' "<< ((totalTime%3600)%60) <<"\" "<< std::endl; logFile <<"-------------"<<std::endl; logFile <<"Finish Time:\t"<< ctime(&programEndTime) <<std::endl; logFile <<"Elapsed time : "<< totalTime/3600 <<"h "<< (totalTime%3600)/60 <<"' "<< ((totalTime%3600)%60) <<"\""<< std::endl; // } // catch(std::exception& e) // { // std::cout << e.what() << std::endl; // return 1; // } return 0; }
int main ( void ) /******************************************************************************/ /* Purpose: MAIN is the main program for RANDOM_OPENMP. Discussion: This program simply explores one issue in the generation of random numbers in a parallel program. If the random number generator uses an integer seed to determine the next entry, then it is not easy for a parallel program to reproduce the same exact sequence. But what is worse is that it might not be clear how the separate OpenMP threads should handle the SEED value - as a shared or private variable? It seems clear that each thread should have a private seed that is initialized to a distinct value at the beginning of the computation. Licensing: This code is distributed under the GNU LGPL license. Modified: 03 September 2012 Author: John Burkardt */ { int n; int seed; timestamp ( ); printf ( "\n" ); printf ( "RANDOM_OPENMP\n" ); printf ( " C version\n" ); printf ( " An OpenMP program using random numbers.\n" ); printf ( " The random numbers depend on a seed.\n" ); printf ( " We need to insure that each OpenMP thread\n" ); printf ( " starts with a different seed.\n" ); printf ( "\n" ); printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) ); printf ( " Number of threads = %d\n", omp_get_max_threads ( ) ); n = 100; seed = 123456789; monte_carlo ( n, &seed ); /* Terminate. */ printf ( "\n" ); printf ( "RANDOM_OPENMP\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }