Ejemplo n.º 1
0
void FT_NLP()
{
  int numprocs,myid,ID,tag=999;
  int count,NumSpe;
  int i,kj,num_k,so;
  int Lspe,spe,L,GL,MaxGL;
  double dk,norm_k;
  double rmin,rmax,r,r2,h,sum[2];
  double **SphB;
  double *tmp_SphB,*tmp_SphBp;
  double TStime, TEtime;
  /* for MPI */
  MPI_Status stat;
  MPI_Request request;
  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  dtime(&TStime);

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
 
  if (myid==Host_ID) printf("<FT_NLP>          Fourier transform of non-local projectors\n");

  for (Lspe=0; Lspe<MSpeciesNum; Lspe++){

    spe = Species_Top[myid] + Lspe;

    num_k = Ngrid_NormK;
    dk = PAO_Nkmax/(double)num_k;
    rmin = Spe_VPS_RV[spe][0];
    rmax = Spe_Atom_Cut1[spe] + 0.5;
    h = (rmax - rmin)/(double)OneD_Grid;

    /* kj loop */

#pragma omp parallel shared(Spe_VPS_List,spe,Spe_Num_RVPS,num_k,dk,OneD_Grid,rmin,h,VPS_j_dependency,Spe_NLRF_Bessel)  private(MaxGL,L,GL,SphB,tmp_SphB,tmp_SphBp,OMPID,Nthrds,Nprocs,norm_k,i,r,r2,sum,so,kj)
    {

      /* allocate SphB */

      MaxGL = -1;
      for (L=1; L<=Spe_Num_RVPS[spe]; L++){
	GL = Spe_VPS_List[spe][L];
	if (MaxGL<GL) MaxGL = GL;
      }      

      SphB = (double**)malloc(sizeof(double*)*(MaxGL+3));
      for(GL=0; GL<(MaxGL+3); GL++){ 
	SphB[GL] = (double*)malloc(sizeof(double)*(OneD_Grid+1));
      }

      tmp_SphB  = (double*)malloc(sizeof(double)*(MaxGL+3));
      tmp_SphBp = (double*)malloc(sizeof(double)*(MaxGL+3));

      /* get info. on OpenMP */ 

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();

      for ( kj=OMPID; kj<num_k; kj+=Nthrds ){

	norm_k = (double)kj*dk;

	/* calculate SphB */

	for (i=0; i<=OneD_Grid; i++){

	  r = rmin + (double)i*h;

	  Spherical_Bessel(norm_k*r,MaxGL,tmp_SphB,tmp_SphBp);

	  r2 = r*r;
	  for(GL=0; GL<=MaxGL; GL++){ 
	    SphB[GL][i] = tmp_SphB[GL]*r2; 
	  }
	}

	for(GL=0; GL<=MaxGL; GL++){ 
	  SphB[GL][0] *= 0.5;
	  SphB[GL][OneD_Grid] *= 0.5;
	}

	/* loof for L */

	for (L=1; L<=Spe_Num_RVPS[spe]; L++){

	  GL = Spe_VPS_List[spe][L];

	  /****************************************************
                      \int jL(k*r)*RL*r^2 dr 
	  ****************************************************/

	  sum[0] = 0.0;
	  sum[1] = 0.0;

	  for (i=0; i<=OneD_Grid; i++){
	    r = rmin + (double)i*h;
	    for (so=0; so<=VPS_j_dependency[spe]; so++){
	      sum[so] += Nonlocal_RadialF(spe,L-1,so,r)*SphB[GL][i];
	    }
	  }

	  for (so=0; so<=VPS_j_dependency[spe]; so++){
	    Spe_NLRF_Bessel[so][spe][L][kj] = sum[so]*h;
	  }

	} /* L */
      } /* kj */

      /* free arrays */

      for(GL=0; GL<(MaxGL+3); GL++){ 
	free(SphB[GL]);
      }
      free(SphB);

      free(tmp_SphB);
      free(tmp_SphBp);

#pragma omp flush(Spe_NLRF_Bessel)

    } /* #pragma omp parallel */

  } /* Lspe */

  /****************************************************
     Remedy for MSpeciesNum==0
     generate radial grids in the k-space
  ****************************************************/

  dk = PAO_Nkmax/(double)Ngrid_NormK;
  for (i=0; i<Ngrid_NormK; i++){
    NormK[i] = (double)i*dk;
  }

  /***********************************************************
        sending and receiving of Spe_RF_Bessel by MPI
  ***********************************************************/

  for (ID=0; ID<Num_Procs2; ID++){
    NumSpe = Species_End[ID] - Species_Top[ID] + 1;
    for (Lspe=0; Lspe<NumSpe; Lspe++){
      spe = Species_Top[ID] + Lspe;
      for (so=0; so<=VPS_j_dependency[spe]; so++){
	for (L=1; L<=Spe_Num_RVPS[spe]; L++){
	  MPI_Bcast(&Spe_NLRF_Bessel[so][spe][L][0],
		    List_YOUSO[15],MPI_DOUBLE,ID,mpi_comm_level1);
	}
      }
    }
  }

  /***********************************************************
                         elapsed time
  ***********************************************************/

  dtime(&TEtime);

  /*
  printf("myid=%2d Elapsed Time (s) = %15.12f\n",myid,TEtime-TStime);
  MPI_Finalize();
  exit(0);
  */

}
Ejemplo n.º 2
0
extern real fitGemRecomb(double *ct, double *time, double **ctFit,
			const int nData, t_gemParams *params)
{

  int    nThreads, i, iter, status, maxiter;
  real   size, d2, tol, *dumpdata;
  size_t p, n;
  gemFitData *GD;
  char *dumpstr, dumpname[128];

  /* nmsimplex2 had convergence problems prior to gsl v1.14,
   * but it's O(N) instead of O(N) operations, so let's use it if v >= 1.14 */
#ifdef HAVE_LIBGSL
  gsl_multimin_fminimizer *s;
  gsl_vector *x,*dx;             /* parameters and initial step size */
  gsl_multimin_function fitFunc;
#ifdef GSL_MAJOR_VERSION
#ifdef GSL_MINOR_VERSION
#if ((GSL_MAJOR_VERSION == 1 && GSL_MINOR_VERSION >= 14) || \
  (GSL_MAJOR_VERSION > 1))
    const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2;
#else
  const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex;
#endif /* #if ... */
#endif /* GSL_MINOR_VERSION */
#else
  const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex;
#endif /* GSL_MAJOR_VERSION */
  fprintf(stdout, "Will fit ka and kd to the ACF according to the reversible geminate recombination model.\n");
#else  /* HAVE_LIBGSL */
  fprintf(stderr, "Sorry, can't do reversible geminate recombination without gsl. "
	 "Recompile using --with-gsl.\n");
  return -1;
#endif /* HAVE_LIBGSL */

#ifdef HAVE_LIBGSL
#ifdef HAVE_OPENMP
  nThreads = omp_get_num_procs();
  omp_set_num_threads(nThreads);
  fprintf(stdout, "We will be using %i threads.\n", nThreads);
#endif

  iter    = 0;
  status  = 0;
  maxiter = 100;
  tol     = 1e-10;

  p = 2;                  /* Number of parameters to fit. ka and kd.  */
  n = params->nFitPoints; /* params->nLin*2 */;       /* Number of points in the reduced dataset  */

  if (params->D <= 0)
    {
      fprintf(stderr, "Fitting of D is not implemented yet. It must be provided on the command line.\n");
      return -1;
    }
  
/*   if (nData<n) { */
/*     fprintf(stderr, "Reduced data set larger than the complete data set!\n"); */
/*     n=nData; */
/*   } */
  snew(dumpdata, nData);
  snew(GD,1);

  GD->n = n;
  GD->y = ct;
  GD->ctTheory=NULL;
  snew(GD->ctTheory, nData);
  GD->LinLog=NULL;
  snew(GD->LinLog, n);
  GD->time = time;
  GD->ka = 0;
  GD->kd = 0;
  GD->tDelta = time[1]-time[0];
  GD->nData = nData;
  GD->params = params;
  snew(GD->logtime,params->nFitPoints);
  snew(GD->doubleLogTime,params->nFitPoints);

  for (i=0; i<params->nFitPoints; i++)
    {
      GD->doubleLogTime[i] = (double)(getLogIndex(i, params));
      GD->logtime[i] = (int)(GD->doubleLogTime[i]);
      GD->doubleLogTime[i]*=GD->tDelta;

      if (GD->logtime[i] >= nData)
	{
	  fprintf(stderr, "Ayay. It seems we're indexing out of bounds.\n");
	  params->nFitPoints = i;
	}      
    }

  fitFunc.f = &gemFunc_residual2;
  fitFunc.n = 2;
  fitFunc.params = (void*)GD;

  x  = gsl_vector_alloc (fitFunc.n);
  dx = gsl_vector_alloc (fitFunc.n);
  gsl_vector_set (x,  0, 25);
  gsl_vector_set (x,  1, 0.5);
  gsl_vector_set (dx, 0, 0.1);
  gsl_vector_set (dx, 1, 0.01);
  
  
  s = gsl_multimin_fminimizer_alloc (T, fitFunc.n);
  gsl_multimin_fminimizer_set (s, &fitFunc, x, dx);
  gsl_vector_free (x);
  gsl_vector_free (dx);

  do  {
    iter++;
    status = gsl_multimin_fminimizer_iterate (s);
    
    if (status != 0)
      gmx_fatal(FARGS,"Something went wrong in the iteration in minimizer %s:\n \"%s\"\n",
		gsl_multimin_fminimizer_name(s), gsl_strerror(status));
    
    d2     = gsl_multimin_fminimizer_minimum(s);
    size   = gsl_multimin_fminimizer_size(s);
    params->ka = gsl_vector_get (s->x, 0);
    params->kd = gsl_vector_get (s->x, 1);
    
    if (status)
      {
	fprintf(stderr, "%s\n", gsl_strerror(status));
	break;
      }

    status = gsl_multimin_test_size(size,tol);

    if (status == GSL_SUCCESS) {
      fprintf(stdout, "Converged to minimum at\n");
    }

    printf ("iter %5d: ka = %2.5f  kd = %2.5f  f() = %7.3f  size = %.3f  chi2 = %2.5f\n",
	    iter,
	    params->ka,
	    params->kd,
	    s->fval, size, d2);

    if (iter%1 == 0)
      {
	eq10v2(GD->ctTheory, time, nData, params->ka, params->kd, params);
	/* fixGemACF(GD->ctTheory, nFitPoints); */
	sprintf(dumpname, "Iter_%i.xvg", iter);
	for(i=0; i<GD->nData; i++)
	  {
	    dumpdata[i] = (real)(GD->ctTheory[i]);
	    if (!gmx_isfinite(dumpdata[i]))
	      {
		gmx_fatal(FARGS, "Non-finite value in acf.");
	      }
	  }
	dumpN(dumpdata, GD->nData, dumpname);
      }
  }
  while ((status == GSL_CONTINUE) && (iter < maxiter));

  /*   /\* Calculate the theoretical ACF from the parameters one last time. *\/ */
  eq10v2(GD->ctTheory, time, nData, params->ka, params->kd, params);
  *ctFit = GD->ctTheory;

  sfree(GD);
  gsl_multimin_fminimizer_free (s);


  return d2;

#endif /* HAVE_LIBGSL */
}
unsigned int YsThreadController::GetNumCPU(void)
{
	return omp_get_num_procs();
}
Ejemplo n.º 4
0
int dt_init(int argc, char *argv[], const int init_gui)
{
#ifndef __WIN32__
  if(getuid() == 0 || geteuid() == 0)
    printf("WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n");
#endif

  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#if !defined __APPLE__ && !defined __WIN32__
  _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler);
#endif

#ifndef __GNUC_PREREQ
  // on OSX, gcc-4.6 and clang chokes if this is not here.
  #if defined __GNUC__ && defined __GNUC_MINOR__
  # define __GNUC_PREREQ(maj, min) \
  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  #else
  # define __GNUC_PREREQ(maj, min) 0
  #endif
#endif
#ifndef __has_builtin
// http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros
  #define __has_builtin(x) false
#endif

#ifndef __SSE3__
  #error "Unfortunately we depend on SSE3 instructions at this time."
  #error "Please contribute a backport patch (or buy a newer processor)."
#else
  #if (__GNUC_PREREQ(4,8) || __has_builtin(__builtin_cpu_supports))
  //FIXME: check will work only in GCC 4.8+ !!! implement manual cpuid check !!!
  //NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC
  if (!__builtin_cpu_supports("sse3"))
  {
    fprintf(stderr, "[dt_init] unfortunately we depend on SSE3 instructions at this time.\n");
    fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n");
    return 1;
  }
  #else
  //FIXME: no way to check for SSE3 in runtime, implement manual cpuid check !!!
  #endif
#endif

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */
#endif

  // we have to have our share dir in XDG_DATA_DIRS,
  // otherwise GTK+ won't find our logo for the about screen (and maybe other things)
  {
    const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS");
    gchar *new_xdg_data_dirs = NULL;
    gboolean set_env = TRUE;
    if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0')
    {
      // check if DARKTABLE_SHAREDIR is already in there
      gboolean found = FALSE;
      gchar **tokens = g_strsplit(xdg_data_dirs, ":", 0);
      // xdg_data_dirs is neither NULL nor empty => tokens != NULL
      for(char **iter = tokens; *iter != NULL; iter++)
        if(!strcmp(DARKTABLE_SHAREDIR, *iter))
        {
          found = TRUE;
          break;
        }
      g_strfreev(tokens);
      if(found)
        set_env = FALSE;
      else
        new_xdg_data_dirs = g_strjoin(":", DARKTABLE_SHAREDIR, xdg_data_dirs, NULL);
    }
    else
      new_xdg_data_dirs = g_strdup(DARKTABLE_SHAREDIR);

    if(set_env)
      g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1);
    g_free(new_xdg_data_dirs);
  }

  setlocale(LC_ALL, "");
  bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
  textdomain (GETTEXT_PACKAGE);


  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.progname = argv[0];

  // database
  gchar *dbfilename_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *images_to_load = NULL, *config_override = NULL;
  for(int k=1; k<argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
        printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2014 johannes hanika\n"PACKAGE_BUGREPORT"\n"
#ifdef _OPENMP
        "OpenMP support enabled\n"
#else
        "OpenMP support disabled\n"
#endif
        );
        return 1;
      }
      else if(!strcmp(argv[k], "--library"))
      {
        dbfilename_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--datadir"))
      {
        datadir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--moduledir"))
      {
        moduledir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--tmpdir"))
      {
        tmpdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--configdir"))
      {
        configdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--cachedir"))
      {
        cachedir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--localedir"))
      {
        bindtextdomain (GETTEXT_PACKAGE, argv[++k]);
      }
      else if(argv[k][1] == 'd' && argc > k+1)
      {
        if(!strcmp(argv[k+1], "all"))             darktable.unmuted = 0xffffffff;   // enable all debug information
        else if(!strcmp(argv[k+1], "cache"))      darktable.unmuted |= DT_DEBUG_CACHE;   // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k+1], "control"))    darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k+1], "dev"))        darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k+1], "fswatch"))    darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module
        else if(!strcmp(argv[k+1], "input"))      darktable.unmuted |= DT_DEBUG_INPUT; // input devices
        else if(!strcmp(argv[k+1], "camctl"))     darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k+1], "perf"))       darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k+1], "pwstorage"))  darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k+1], "opencl"))     darktable.unmuted |= DT_DEBUG_OPENCL;    // gpu accel via opencl
        else if(!strcmp(argv[k+1], "sql"))        darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k+1], "memory"))     darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k+1], "nan"))        darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else if(!strcmp(argv[k+1], "masks"))      darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff.
        else if(!strcmp(argv[k+1], "lua"))        darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console
        else return usage(argv[0]);
        k ++;
      }
      else if(argv[k][1] == 't' && argc > k+1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k ++;
      }
      else if(!strcmp(argv[k], "--conf"))
      {
        gchar *keyval = g_strdup(argv[++k]), *c = keyval;
        while(*c != '=' && c < keyval + strlen(keyval)) c++;
        if(*c == '=' && *(c+1) != '\0')
        {
          *c++ = '\0';
          dt_conf_string_entry_t *entry = (dt_conf_string_entry_t*)g_malloc(sizeof(dt_conf_string_entry_t));
          entry->key = g_strdup(keyval);
          entry->value = g_strdup(c);
          config_override = g_slist_append(config_override, entry);
        }
        g_free(keyval);
      }
    }
#ifndef MAC_INTEGRATION
    else
    {
      images_to_load = g_slist_append(images_to_load, argv[k]);
    }
#endif
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#if !GLIB_CHECK_VERSION(2, 35, 0)
  g_type_init();
#endif

  // does not work, as gtk is not inited yet.
  // even if it were, it's a super bad idea to invoke gtk stuff from
  // a signal handler.
  /* check cput caps */
  // dt_check_cpu(argc,argv);

#ifdef HAVE_GEGL
  char geglpath[PATH_MAX];
  char datadir[PATH_MAX];
  dt_loc_get_datadir(datadir, sizeof(datadir));
  snprintf(geglpath, sizeof(geglpath), "%s/gegl:/usr/lib/gegl-0.0", datadir);
  (void)setenv("GEGL_PATH", geglpath, 1);
  gegl_init(&argc, &argv);
#endif
#ifdef USE_LUA
  dt_lua_init_early(NULL);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[PATH_MAX];
  dt_loc_get_user_config_dir (datadir, sizeof(datadir));
  char filename[PATH_MAX];
  snprintf(filename, sizeof(filename), "%s/darktablerc", datadir);

  // initialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, filename, config_override);
  g_slist_free_full(config_override, g_free);

  // set the interface language
  const gchar* lang = dt_conf_get_string("ui_last/gui_language"); // we may not g_free 'lang' since it is owned by setlocale afterwards
  if(lang != NULL && lang[0] != '\0')
  {
    if(setlocale(LC_ALL, lang) != NULL)
      gtk_disable_setlocale();
  }

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(!dt_database_get_lock_acquired(darktable.db))
  {
    // send the images to the other instance via dbus
    if(images_to_load)
    {
      GSList *p = images_to_load;

      // get a connection!
      GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL);

      while (p != NULL)
      {
        // make the filename absolute ...
        gchar *filename = dt_make_path_absolute((gchar*)p->data);
        if(filename == NULL) continue;
        // ... and send it to the running instance of darktable
        g_dbus_connection_call_sync(connection,
                                    "org.darktable.service",
                                    "/darktable",
                                    "org.darktable.service.Remote",
                                    "Open",
                                    g_variant_new ("(s)", filename),
                                    NULL,
                                    G_DBUS_CALL_FLAGS_NONE,
                                    -1,
                                    NULL,
                                    NULL);
        p = g_slist_next(p);
        g_free(filename);
      }

      g_slist_free(images_to_load);
      g_object_unref(connection);
    }

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Make sure that the database and xmp files are in sync before starting the fswatch.
  // We need conf and db to be up and running for that which is the case here.
  // FIXME: is this also useful in non-gui mode?
  GList *changed_xmp_files = NULL;
  if(init_gui && dt_conf_get_bool("run_crawler_on_start"))
  {
    changed_xmp_files = dt_control_crawler_run();
  }

  // Initialize the filesystem watcher
  darktable.fswatch=dt_fswatch_new();

#ifdef HAVE_GPHOTO2
  // Initialize the camera control
  darktable.camctl=dt_camctl_new();
#endif

  // get max lighttable thumbnail size:
  darktable.thumbnail_width  = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"),  200, 3000);
  darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000);
  // and make sure it can be mip-mapped all the way from mip4 to mip0
  darktable.thumbnail_width  /= 16;
  darktable.thumbnail_width  *= 16;
  darktable.thumbnail_height /= 16;
  darktable.thumbnail_height *= 16;

  // Initialize the password storage engine
  darktable.pwstorage=dt_pwstorage_new();

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t));
  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
      dt_gui_presets_init(); // init preset db schema.
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection_listeners = NULL;
  darktable.collection = dt_collection_new(NULL);

  /* initialize selection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);
#endif

  darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t));
#ifdef HAVE_OPENCL
  dt_opencl_init(darktable.opencl, argc, argv);
#endif

  darktable.blendop = (dt_blendop_t *)calloc(1, sizeof(dt_blendop_t));
  dt_develop_blend_init(darktable.blendop);

  darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1;
    dt_bauhaus_init();
  }
  else darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
    darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_control_load_config(darktable.control);
  }
  darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  if(init_gui)
  {
    // Loading the keybindings
    char keyfile[PATH_MAX];

    // First dump the default keymapping
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // initialize undo struct
    darktable.undo = dt_undo_init();

    // load image(s) specified on cmdline
    int id = 0;
    if(images_to_load)
    {
      // If only one image is listed, attempt to load it in darkroom
      gboolean load_in_dr = (g_slist_next(images_to_load) == NULL);
      GSList *p = images_to_load;

      while (p != NULL)
      {
        // don't put these function calls into MAX(), the macro will evaluate
        // it twice (and happily deadlock, in this particular case)
        int newid = dt_load_from_string((gchar*)p->data, load_in_dr);
        id = MAX(id, newid);
        p = g_slist_next(p);
      }

      if (!load_in_dr || id == 0)
        dt_ctl_switch_mode_to(DT_LIBRARY);

      g_slist_free(images_to_load);
    }
    else
      dt_ctl_switch_mode_to(DT_LIBRARY);
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  dt_image_local_copy_synch();

  /* init lua last, since it's user made stuff it must be in the real environment */
#ifdef USE_LUA
  dt_lua_init(darktable.lua_state.state,init_gui);
#endif

  // last but not least construct the popup that asks the user about images whose xmp files are newer than the db entry
  if(init_gui && changed_xmp_files)
  {
    dt_control_crawler_show_image_list(changed_xmp_files);
  }

  return 0;
}
Ejemplo n.º 5
0
int main(int argc, char* argv[]) {
  int t = (argc > 1) ? atoi(argv[1]) : omp_get_num_procs();
  ImageInverter ii("../assets/pics/Car-colors.jpg", 1022, 1024);
  ii.run(t);
}
Ejemplo n.º 6
0
int main ( int argc, char *argv[] )

/******************************************************************************/
/*
  Purpose:

    HELLO has each thread print out its ID.

  Discussion:

    HELLO is a "Hello, World" program for OpenMP.

  Licensing:

    This code is distributed under the GNU LGPL license.

  Modified:

    23 June 2010

  Author:

    John Burkardt
*/
{
    int id;
    double wtime;

    printf ( "\n" );
    printf ( "HELLO_OPENMP\n" );
    printf ( "  C/OpenMP version\n" );

    printf ( "\n" );
    printf ( "  Number of processors available = %d\n", omp_get_num_procs ( ) );
    printf ( "  Number of threads =              %d\n", omp_get_max_threads ( ) );

    wtime = omp_get_wtime ( );

    printf ( "\n" );
    printf ( "  OUTSIDE the parallel region.\n" );
    printf ( "\n" );

    id = omp_get_thread_num ( );
    printf ( "  HELLO from process %d\n", id ) ;

    printf ( "\n" );
    printf ( "  Going INSIDE the parallel region:\n" );
    printf ( "\n" );
    /*
      INSIDE THE PARALLEL REGION, have each thread say hello.
    */
    # pragma omp parallel \
    private ( id )
    {
        id = omp_get_thread_num ( );
        printf ("  Hello from process %d\n", id );
    }
    /*
      Finish up by measuring the elapsed time.
    */
    wtime = omp_get_wtime ( ) - wtime;

    printf ( "\n" );
    printf ( "  Back OUTSIDE the parallel region.\n" );
    /*
      Terminate.
    */
    printf ( "\n" );
    printf ( "HELLO_OPENMP\n" );
    printf ( "  Normal end of execution.\n" );

    printf ( "\n" );
    printf ( "  Elapsed wall clock time = %f\n", wtime );

    return 0;
}
Ejemplo n.º 7
0
       KernelDepth( "kernelDepth" ) ,
       AdaptiveExponent( "adaptiveExp" , 1 ) ,
       MinIters( "minIters" , 24 ) ,
       FixedIters( "iters" , -1 ) ,
       VoxelDepth( "voxelDepth" , -1 ) ,
#if 1
#ifdef _WIN32
#pragma message( "[WARNING] Setting default min-depth to 5" )
#endif // _WIN32
       MinDepth( "minDepth" , 5 ) ,
#else
       MinDepth( "minDepth" , 0 ) ,
#endif
       MaxSolveDepth( "maxSolveDepth" ) ,
       BoundaryType( "boundary" , 1 ) ,
       Threads( "threads" , omp_get_num_procs() );

cmdLineFloat
SamplesPerNode( "samplesPerNode" , 1.f ) ,
                Scale( "scale" , 1.1f ) ,
                SolverAccuracy( "accuracy" , float(1e-3) ) ,
                PointWeight( "pointWeight" , 4.f );


cmdLineReadable* params[] =
{
    &In , &Depth , &Out , &XForm ,
    &SolverDivide , &IsoDivide , &Scale , &Verbose , &SolverAccuracy , &NoComments ,
    &KernelDepth , &SamplesPerNode , &Confidence , &NonManifold , &PolygonMesh , &ASCII , &ShowResidual , &MinIters , &FixedIters , &VoxelDepth ,
    &PointWeight , &VoxelGrid , &Threads , &MinDepth , &MaxSolveDepth ,
    &AdaptiveExponent , &BoundaryType ,
Ejemplo n.º 8
0
// Main
int main(int argc, char* argv[])
{
	// Disabling Bzz OpenMP
	//bzzOpenMP = 0;

	// Dictionary setup
	OpenSMOKE_KPP_Dictionary dictionary;
	dictionary.ParseFile("Input.inp");

	// Read input file
	OpenSMOKE_KPP_DataManager data(dictionary);

	// OpenMP
    //omp_set_dynamic(false);
	//omp_set_num_threads(12);


    cout << "Number of user defined threads: " << data.nThreads()           << endl;
	cout << "Number of current threads:      " << omp_get_num_threads() 	<< endl;
	cout << "Master thread id:               " << omp_get_thread_num() 	<< endl;
	cout << "Number of processes:            " << omp_get_num_procs() 	<< endl;
	cout << "Max number of threads:          " << omp_get_max_threads() 	<< endl;



	// Initialize LIS
	lis_initialize(&argc, &argv);

	// Gas mixture setup
    	cout << "Start" << endl; //getchar();
	OpenSMOKE_ReactingGas* mix;
	if (data.iSaveKineticConstants() == false)
	{
		mix = new OpenSMOKE_ReactingGas[data.nThreads()];
		for (int k=0;k<data.nThreads();k++)
			mix[k].SetupBinary(dictionary.kinetics());
	}
	else
	{
		mix = new OpenSMOKE_ReactingGas[1];
		mix[0].SetupBinary(dictionary.kinetics());
	}	
    	cout << "Done: Mixture" << endl; //getchar();
	
	// Reactor network setup
	OpenSMOKE_KPP_ReactorNetwork network(*mix, data, fLog, fWarning);
        cout << "Done: Network" << endl; //getchar();
	network.ReadFirstGuess();
        cout << "Done: FirstGuess" << endl; //getchar();
	network.ReadTopology();
        cout << "Done: Topology" << endl; //getchar();
	network.BuildNetwork();

	// Initial Analysis
	InputOutputAnalysis(*mix, network);
	MassUmbalances(network);
	
	string log_string = data.nameFolderOutput() + "/Log.out";
	fLog.open( log_string.c_str(), ios::out);
	fLog.setf(ios::scientific);

	string warning_string = data.nameFolderOutput() + "/Warning.out";
	fWarning.open( warning_string.c_str(), ios::out);
	fWarning.setf(ios::scientific);



	globalIteration = 1;

	// Without reactions
	if (data.iReactions() == false)
	{
		network.SolveWithoutReactions();
		network.WriteMassFractionMap();
	}
	
	// With reactions
	if (data.iReactions() == true)
	{
		double timeStartTotal = BzzGetCpuTime();
	
	//	SequenceCSTR(network, data);
	//	PredictorCorrector(network, data);
	//	GlobalODE(network, data);
	//	GlobalNewtonMethod(network, data);

		globalODELinearSystemAbsoluteTolerance = 1.e-11;
	 	globalODELinearSystemRelativeTolerance = 1.e-9;
		globalNLSLinearSystemAbsoluteTolerance = 1.e-15;
	 	globalNLSLinearSystemRelativeTolerance = 1.e-12;

		int iFlagSequence;
		int iFlagODE;
		int iFlagNewtonMethod;

		for(int jj=1;jj<=3;jj++)
		{
			iFlagSequence	  = SequenceCSTR(network, data);
			iFlagODE          = GlobalODE(network, data);

			globalODELinearSystemAbsoluteTolerance /= 10.;
			globalODELinearSystemRelativeTolerance /= 10.;
		}

		iFlagNewtonMethod	= GlobalNewtonMethod(network, data);

		for(int jj=1;jj<=2;jj++)
		{			
			globalODELinearSystemAbsoluteTolerance /= 10.;
			globalODELinearSystemRelativeTolerance /= 10.;
		
			iFlagSequence	  	= SequenceCSTR(network, data);
			iFlagODE          	= GlobalODE(network, data);	
			iFlagNewtonMethod	= GlobalNewtonMethod(network, data);
		}

		double timeEndTotal = BzzGetCpuTime();

		// Final Analysis
		InputOutputAnalysis(*mix, network);
		MassUmbalances(network);

		cout << "Total CPU Time: " << timeEndTotal - timeStartTotal << endl;
	}

	
}
Ejemplo n.º 9
0
void Set_XC_Grid(int XC_P_switch, int XC_switch)
{
  /****************************************************
        XC_P_switch:
            0  \epsilon_XC (XC energy density)  
            1  \mu_XC      (XC potential)  
            2  \epsilon_XC - \mu_XC
  ****************************************************/

  int MN,MN1,MN2,i,j,k,ri,ri1,ri2;
  int i1,i2,j1,j2,k1,k2,n,nmax;
  double den_min=1.0e-14; 
  double Ec_unif[1],Vc_unif[2],Exc[2],Vxc[2];
  double Ex_unif[1],Vx_unif[2],tot_den;
  double ED[2],GDENS[3][2];
  double DEXDD[2],DECDD[2];
  double DEXDGD[3][2],DECDGD[3][2];
  double ***dEXC_dGD,***dDen_Grid;
  double up_x_a,up_x_b,up_x_c;
  double up_y_a,up_y_b,up_y_c;
  double up_z_a,up_z_b,up_z_c;
  double dn_x_a,dn_x_b,dn_x_c;
  double dn_y_a,dn_y_b,dn_y_c;
  double dn_z_a,dn_z_b,dn_z_c;
  double up_a,up_b,up_c;
  double dn_a,dn_b,dn_c;

  double tmp0,tmp1;
  double cot,sit,sip,cop,phi,theta;
  double detA,igtv[4][4];
  int numprocs,myid;

  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  /****************************************************
   when GGA, allocation

   double dEXC_dGD[2][3][My_NumGrid1]
   double dDen_Grid[2][3][My_NumGrid1]
  ****************************************************/

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  if (XC_switch==4){

    dDen_Grid = (double***)malloc(sizeof(double**)*2); 
    for (k=0; k<=1; k++){
      dDen_Grid[k] = (double**)malloc(sizeof(double*)*3); 
      for (i=0; i<3; i++){
        dDen_Grid[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); 
        for (j=0; j<My_NumGrid1; j++) dDen_Grid[k][i][j] = 0.0;
      }
    }

    if (XC_P_switch!=0){
      dEXC_dGD = (double***)malloc(sizeof(double**)*2); 
      for (k=0; k<=1; k++){
        dEXC_dGD[k] = (double**)malloc(sizeof(double*)*3); 
        for (i=0; i<3; i++){
          dEXC_dGD[k][i] = (double*)malloc(sizeof(double)*My_NumGrid1); 
          for (j=0; j<My_NumGrid1; j++) dEXC_dGD[k][i][j] = 0.0;
        }
      }
    }

    /* PrintMemory */
    PrintMemory("Set_XC_Grid: dDen_Grid", sizeof(double)*6*My_NumGrid1, NULL);
    PrintMemory("Set_XC_Grid: dEXC_dGD",  sizeof(double)*6*My_NumGrid1, NULL);

    /****************************************************
     calculate dDen_Grid
    ****************************************************/
 
    detA =   gtv[1][1]*gtv[2][2]*gtv[3][3]
           + gtv[1][2]*gtv[2][3]*gtv[3][1]
           + gtv[1][3]*gtv[2][1]*gtv[3][2]
           - gtv[1][3]*gtv[2][2]*gtv[3][1]
           - gtv[1][2]*gtv[2][1]*gtv[3][3]
           - gtv[1][1]*gtv[2][3]*gtv[3][2];     

    igtv[1][1] =  (gtv[2][2]*gtv[3][3] - gtv[2][3]*gtv[3][2])/detA;
    igtv[2][1] = -(gtv[2][1]*gtv[3][3] - gtv[2][3]*gtv[3][1])/detA;
    igtv[3][1] =  (gtv[2][1]*gtv[3][2] - gtv[2][2]*gtv[3][1])/detA; 

    igtv[1][2] = -(gtv[1][2]*gtv[3][3] - gtv[1][3]*gtv[3][2])/detA;
    igtv[2][2] =  (gtv[1][1]*gtv[3][3] - gtv[1][3]*gtv[3][1])/detA;
    igtv[3][2] = -(gtv[1][1]*gtv[3][2] - gtv[1][2]*gtv[3][1])/detA; 

    igtv[1][3] =  (gtv[1][2]*gtv[2][3] - gtv[1][3]*gtv[2][2])/detA;
    igtv[2][3] = -(gtv[1][1]*gtv[2][3] - gtv[1][3]*gtv[2][1])/detA;
    igtv[3][3] =  (gtv[1][1]*gtv[2][2] - gtv[1][2]*gtv[2][1])/detA; 

#pragma omp parallel shared(igtv,dDen_Grid,PCCDensity_Grid,PCC_switch,Density_Grid,den_min,My_Cell0,My_Cell1,Ngrid3,Ngrid2,Num_Cells0) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_a,dn_a,up_b,dn_b,up_c,dn_c)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();
      nmax = Num_Cells0*Ngrid2*Ngrid3; 

      for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){

	i = n/(Ngrid2*Ngrid3);
	j = (n-i*Ngrid2*Ngrid3)/Ngrid3;
	k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; 
	ri = My_Cell1[i];

	/* find ri1, ri2, i1, and i2 */

	if (ri==0){
	  ri1 = Ngrid1 - 1;
	  ri2 = 1;        
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}
	else if (ri==(Ngrid1-1)){
	  ri1 = Ngrid1 - 2;
	  ri2 = 0;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}      
	else{
	  ri1 = ri - 1;
	  ri2 = ri + 1;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}

	/* because we have +-1 buffer cells. */

	if (i1!=-1 && i2!=-1){

	  /* find j1 and j2 */

	  if (j==0){
	    j1 = Ngrid2 - 1;
	    j2 = 1;
	  }
	  else if (j==(Ngrid2-1)){
	    j1 = Ngrid2 - 2;
	    j2 = 0;
	  }
	  else{
	    j1 = j - 1;
	    j2 = j + 1;
	  }

	  /* find k1 and k2 */

	  if (k==0){
	    k1 = Ngrid3 - 1;
	    k2 = 1;
	  }
	  else if (k==(Ngrid3-1)){
	    k1 = Ngrid3 - 2;
	    k2 = 0;
	  }
	  else{
	    k1 = k - 1;
	    k2 = k + 1;
	  }  

	  /* set MN */

	  MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; 

	  /* set dDen_Grid */

	  if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){

	    /* a-axis */

	    MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k;
	    MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k;

	    if (PCC_switch==0) {
	      up_a = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_a = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_a = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_a = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* b-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; 
	    MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; 

	    if (PCC_switch==0) {
	      up_b = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_b = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_b = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_b = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* c-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; 
	    MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; 

	    if (PCC_switch==0) {
	      up_c = Density_Grid[0][MN2] - Density_Grid[0][MN1];
	      dn_c = Density_Grid[1][MN2] - Density_Grid[1][MN1];
	    }
	    else if (PCC_switch==1) {
	      up_c = Density_Grid[0][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[0][MN1] - PCCDensity_Grid[MN1];
	      dn_c = Density_Grid[1][MN2] + PCCDensity_Grid[MN2]
	           - Density_Grid[1][MN1] - PCCDensity_Grid[MN1];
	    }

	    /* up */
	    dDen_Grid[0][0][MN] = 0.5*(igtv[1][1]*up_a + igtv[1][2]*up_b + igtv[1][3]*up_c);
	    dDen_Grid[0][1][MN] = 0.5*(igtv[2][1]*up_a + igtv[2][2]*up_b + igtv[2][3]*up_c);
	    dDen_Grid[0][2][MN] = 0.5*(igtv[3][1]*up_a + igtv[3][2]*up_b + igtv[3][3]*up_c);

	    /* down */
	    dDen_Grid[1][0][MN] = 0.5*(igtv[1][1]*dn_a + igtv[1][2]*dn_b + igtv[1][3]*dn_c);
	    dDen_Grid[1][1][MN] = 0.5*(igtv[2][1]*dn_a + igtv[2][2]*dn_b + igtv[2][3]*dn_c);
	    dDen_Grid[1][2][MN] = 0.5*(igtv[3][1]*dn_a + igtv[3][2]*dn_b + igtv[3][3]*dn_c);
	  }

	  else{
	    dDen_Grid[0][0][MN] = 0.0;
	    dDen_Grid[0][1][MN] = 0.0;
	    dDen_Grid[0][2][MN] = 0.0;
	    dDen_Grid[1][0][MN] = 0.0;
	    dDen_Grid[1][1][MN] = 0.0;
	    dDen_Grid[1][2][MN] = 0.0;
	  }

	} /* if (i1!=-1 && i2!=-1) */
      } /* n */

#pragma omp flush(dDen_Grid)

    } /* #pragma omp parallel */
  } /* if (XC_switch==4) */ 

  /****************************************************
   loop MN
  ****************************************************/

#pragma omp parallel shared(dDen_Grid,dEXC_dGD,den_min,Vxc_Grid,My_NumGrid1,XC_P_switch,XC_switch,Density_Grid,PCC_switch,PCCDensity_Grid) private(OMPID,Nthrds,Nprocs,MN,tot_den,tmp0,ED,Exc,Ec_unif,Vc_unif,Vxc,Ex_unif,Vx_unif,GDENS,DEXDD,DECDD,DEXDGD,DECDGD)
  {

    OMPID = omp_get_thread_num();
    Nthrds = omp_get_num_threads();
    Nprocs = omp_get_num_procs();

    for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){

      switch(XC_switch){
        
	/******************************************************************
         LDA (Ceperly-Alder)

         constructed by Ceperly and Alder,
         ref.
         D. M. Ceperley, Phys. Rev. B18, 3126 (1978)
         D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) 

         and parametrized by Perdew and Zunger.
         ref.
         J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981)
	******************************************************************/
        
      case 1:
        
	tot_den = Density_Grid[0][MN] + Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  tot_den += PCCDensity_Grid[MN]*2.0;
	}

	tmp0 = XC_Ceperly_Alder(tot_den,XC_P_switch);
	Vxc_Grid[0][MN] = tmp0;
	Vxc_Grid[1][MN] = tmp0;
        
	break;

	/******************************************************************
         LSDA-CA (Ceperly-Alder)

         constructed by Ceperly and Alder,
         ref.
         D. M. Ceperley, Phys. Rev. B18, 3126 (1978)
         D. M. Ceperley and B. J. Alder, Phys. Rev. Lett., 45, 566 (1980) 

         and parametrized by Perdew and Zunger.
         ref.
         J. Perdew and A. Zunger, Phys. Rev. B23, 5048 (1981)
	******************************************************************/

      case 2:

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  ED[0] += PCCDensity_Grid[MN];
	  ED[1] += PCCDensity_Grid[MN];
	}

	XC_CA_LSDA(ED[0], ED[1], Exc, XC_P_switch);
	Vxc_Grid[0][MN] = Exc[0];
	Vxc_Grid[1][MN] = Exc[1];

	break;

	/******************************************************************
         LSDA-PW (PW91)
         used as Grad\rho = 0 in their GGA formalism

         ref.
         J.P.Perdew and Yue Wang, Phys. Rev. B45, 13244 (1992) 
	******************************************************************/

      case 3:

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	/* partial core correction */
	if (PCC_switch==1) {
	  ED[0] += PCCDensity_Grid[MN];
	  ED[1] += PCCDensity_Grid[MN];
	}

	if ((ED[0]+ED[1])<den_min){
	  Vxc_Grid[0][MN] = 0.0;
	  Vxc_Grid[1][MN] = 0.0;
	}
	else{
      
	  if (XC_P_switch==0){

	    XC_PW91C(ED,Ec_unif,Vc_unif);

	    Vxc[0] = Vc_unif[0];
	    Vxc[1] = Vc_unif[1];
	    Exc[0] = Ec_unif[0];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc[0] = Vxc[0] + Vx_unif[0];
	    Exc[1] = 2.0*ED[0]*Ex_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc[1] += Vx_unif[0];
	    Exc[1] += 2.0*ED[1]*Ex_unif[0];

	    Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]);

	    Vxc_Grid[0][MN] = Exc[0] + Exc[1];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1];
	  }

	  else if (XC_P_switch==1){
	    XC_PW91C(ED,Ec_unif,Vc_unif);
	    Vxc_Grid[0][MN] = Vc_unif[0];
	    Vxc_Grid[1][MN] = Vc_unif[1];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc_Grid[0][MN] = Vxc_Grid[0][MN] + Vx_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc_Grid[1][MN] = Vxc_Grid[1][MN] + Vx_unif[0];
	  }

	  else if (XC_P_switch==2){

	    XC_PW91C(ED,Ec_unif,Vc_unif);

	    Vxc[0] = Vc_unif[0];
	    Vxc[1] = Vc_unif[1];
	    Exc[0] = Ec_unif[0];

	    XC_EX(1,2.0*ED[0],ED,Ex_unif,Vx_unif);
	    Vxc[0]  = Vxc[0] + Vx_unif[0];
	    Exc[1]  = 2.0*ED[0]*Ex_unif[0];

	    XC_EX(1,2.0*ED[1],ED,Ex_unif,Vx_unif);
	    Vxc[1] += Vx_unif[0];
	    Exc[1] += 2.0*ED[1]*Ex_unif[0];

	    Exc[1] = 0.5*Exc[1]/(ED[0]+ED[1]);

	    Vxc_Grid[0][MN] = Exc[0] + Exc[1] - Vxc[0];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1] - Vxc[1];
	  }
	}

	break;

	/******************************************************************
         GGA-PBE
         ref.
         J. P. Perdew, K. Burke, and M. Ernzerhof,
         Phys. Rev. Lett. 77, 3865 (1996).
	******************************************************************/

      case 4:

	/****************************************************
         ED[0]       density of up spin:     n_up   
         ED[1]       density of down spin:   n_down

         GDENS[0][0] derivative (x) of density of up spin
         GDENS[1][0] derivative (y) of density of up spin
         GDENS[2][0] derivative (z) of density of up spin
         GDENS[0][1] derivative (x) of density of down spin
         GDENS[1][1] derivative (y) of density of down spin
         GDENS[2][1] derivative (z) of density of down spin

         DEXDD[0]    d(fx)/d(n_up) 
         DEXDD[1]    d(fx)/d(n_down) 
         DECDD[0]    d(fc)/d(n_up) 
         DECDD[1]    d(fc)/d(n_down) 

         n'_up_x   = d(n_up)/d(x)
         n'_up_y   = d(n_up)/d(y)
         n'_up_z   = d(n_up)/d(z)
         n'_down_x = d(n_down)/d(x)
         n'_down_y = d(n_down)/d(y)
         n'_down_z = d(n_down)/d(z)
       
         DEXDGD[0][0] d(fx)/d(n'_up_x) 
         DEXDGD[1][0] d(fx)/d(n'_up_y) 
         DEXDGD[2][0] d(fx)/d(n'_up_z) 
         DEXDGD[0][1] d(fx)/d(n'_down_x) 
         DEXDGD[1][1] d(fx)/d(n'_down_y) 
         DEXDGD[2][1] d(fx)/d(n'_down_z) 

         DECDGD[0][0] d(fc)/d(n'_up_x) 
         DECDGD[1][0] d(fc)/d(n'_up_y) 
         DECDGD[2][0] d(fc)/d(n'_up_z) 
         DECDGD[0][1] d(fc)/d(n'_down_x) 
         DECDGD[1][1] d(fc)/d(n'_down_y) 
         DECDGD[2][1] d(fc)/d(n'_down_z) 
	****************************************************/

	ED[0] = Density_Grid[0][MN];
	ED[1] = Density_Grid[1][MN];

	if ((ED[0]+ED[1])<den_min){
	  Vxc_Grid[0][MN] = 0.0;
	  Vxc_Grid[1][MN] = 0.0;

	  /* later add its derivatives */
	  if (XC_P_switch!=0){
	    dEXC_dGD[0][0][MN] = 0.0;
	    dEXC_dGD[0][1][MN] = 0.0;
	    dEXC_dGD[0][2][MN] = 0.0;

	    dEXC_dGD[1][0][MN] = 0.0;
	    dEXC_dGD[1][1][MN] = 0.0;
	    dEXC_dGD[1][2][MN] = 0.0;
	  }
	}
     
	else{

	  GDENS[0][0] = dDen_Grid[0][0][MN];
	  GDENS[1][0] = dDen_Grid[0][1][MN];
	  GDENS[2][0] = dDen_Grid[0][2][MN];
	  GDENS[0][1] = dDen_Grid[1][0][MN];
	  GDENS[1][1] = dDen_Grid[1][1][MN];
	  GDENS[2][1] = dDen_Grid[1][2][MN];

	  if (PCC_switch==1) {
	    ED[0] += PCCDensity_Grid[MN];
	    ED[1] += PCCDensity_Grid[MN];
	  }

	  XC_PBE(ED, GDENS, Exc, DEXDD, DECDD, DEXDGD, DECDGD);

	  /* XC energy density */
	  if      (XC_P_switch==0){
	    Vxc_Grid[0][MN] = Exc[0] + Exc[1];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1];
	  }

	  /* XC potential */
	  else if (XC_P_switch==1){
	    Vxc_Grid[0][MN] = DEXDD[0] + DECDD[0];
	    Vxc_Grid[1][MN] = DEXDD[1] + DECDD[1];
	  }

	  /* XC energy density - XC potential */
	  else if (XC_P_switch==2){
	    Vxc_Grid[0][MN] = Exc[0] + Exc[1] - DEXDD[0] - DECDD[0];
	    Vxc_Grid[1][MN] = Exc[0] + Exc[1] - DEXDD[1] - DECDD[1];
	  }

	  /* later add its derivatives */
	  if (XC_P_switch!=0){
	    dEXC_dGD[0][0][MN] = DEXDGD[0][0] + DECDGD[0][0];
	    dEXC_dGD[0][1][MN] = DEXDGD[1][0] + DECDGD[1][0];
	    dEXC_dGD[0][2][MN] = DEXDGD[2][0] + DECDGD[2][0];

	    dEXC_dGD[1][0][MN] = DEXDGD[0][1] + DECDGD[0][1];
	    dEXC_dGD[1][1][MN] = DEXDGD[1][1] + DECDGD[1][1];
	    dEXC_dGD[1][2][MN] = DEXDGD[2][1] + DECDGD[2][1];
	  }
	}

	break;

      } /* switch(XC_switch) */
    }   /* MN */

#pragma omp flush(dEXC_dGD)

  } /* #pragma omp parallel */

  /****************************************************
        calculate the second part of XC potential
               when GGA and XC_P_switch!=0
  ****************************************************/

  if (XC_switch==4 && XC_P_switch!=0){

#pragma omp parallel shared(XC_P_switch,Vxc_Grid,igtv,dEXC_dGD,Density_Grid,den_min,My_Cell0,My_Cell1,Num_Cells0,Ngrid2,Ngrid3) private(OMPID,Nthrds,Nprocs,nmax,n,i,j,k,ri,ri1,ri2,i1,i2,j1,j2,k1,k2,MN,MN1,MN2,up_x_a,up_y_a,up_z_a,dn_x_a,dn_y_a,dn_z_a,up_x_b,up_y_b,up_z_b,dn_x_b,dn_y_b,dn_z_b,up_x_c,up_y_c,up_z_c,dn_x_c,dn_y_c,dn_z_c,tmp0,tmp1)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();
      nmax = Num_Cells0*Ngrid2*Ngrid3; 

      for (n=OMPID*nmax/Nthrds; n<(OMPID+1)*nmax/Nthrds; n++){

	i = n/(Ngrid2*Ngrid3);
	j = (n-i*Ngrid2*Ngrid3)/Ngrid3;
	k = n - i*Ngrid2*Ngrid3 - j*Ngrid3; 
	ri = My_Cell1[i];

	/* find ri1, ri2, i1, and i2 */

	if (ri==0){
	  ri1 = Ngrid1 - 1;
	  ri2 = 1;        
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}
	else if (ri==(Ngrid1-1)){
	  ri1 = Ngrid1 - 2;
	  ri2 = 0;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}      
	else{
	  ri1 = ri - 1;
	  ri2 = ri + 1;
	  i1 = My_Cell0[ri1];
	  i2 = My_Cell0[ri2];
	}

	if (i1!=-1 && i2!=-1){

	  /* find j1 and j2 */

	  if (j==0){
	    j1 = Ngrid2 - 1;
	    j2 = 1;
	  }
	  else if (j==(Ngrid2-1)){
	    j1 = Ngrid2 - 2;
	    j2 = 0;
	  }
	  else{
	    j1 = j - 1;
	    j2 = j + 1;
	  }

	  /* find k1 and k2 */

	  if (k==0){
	    k1 = Ngrid3 - 1;
	    k2 = 1;
	  }
	  else if (k==(Ngrid3-1)){
	    k1 = Ngrid3 - 2;
	    k2 = 0;
	  }
	  else{
	    k1 = k - 1;
	    k2 = k + 1;
	  }  

	  /* set MN */

	  MN = i*Ngrid2*Ngrid3 + j*Ngrid3 + k; 

	  /* set Vxc_Grid */

	  if ( den_min<(Density_Grid[0][MN]+Density_Grid[1][MN]) ){

	    /* a-axis */

	    MN1 = i1*Ngrid2*Ngrid3 + j*Ngrid3 + k;
	    MN2 = i2*Ngrid2*Ngrid3 + j*Ngrid3 + k;

	    up_x_a = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_a = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_a = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_a = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_a = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_a = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* b-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j1*Ngrid3 + k; 
	    MN2 = i*Ngrid2*Ngrid3 + j2*Ngrid3 + k; 

	    up_x_b = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_b = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_b = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_b = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_b = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_b = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* c-axis */

	    MN1 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k1; 
	    MN2 = i*Ngrid2*Ngrid3 + j*Ngrid3 + k2; 

	    up_x_c = dEXC_dGD[0][0][MN2] - dEXC_dGD[0][0][MN1];
	    up_y_c = dEXC_dGD[0][1][MN2] - dEXC_dGD[0][1][MN1];
	    up_z_c = dEXC_dGD[0][2][MN2] - dEXC_dGD[0][2][MN1];

	    dn_x_c = dEXC_dGD[1][0][MN2] - dEXC_dGD[1][0][MN1];
	    dn_y_c = dEXC_dGD[1][1][MN2] - dEXC_dGD[1][1][MN1];
	    dn_z_c = dEXC_dGD[1][2][MN2] - dEXC_dGD[1][2][MN1];

	    /* up */

	    tmp0 = igtv[1][1]*up_x_a + igtv[1][2]*up_x_b + igtv[1][3]*up_x_c
	      + igtv[2][1]*up_y_a + igtv[2][2]*up_y_b + igtv[2][3]*up_y_c
	      + igtv[3][1]*up_z_a + igtv[3][2]*up_z_b + igtv[3][3]*up_z_c;
	    tmp0 = 0.5*tmp0;

	    /* down */

	    tmp1 = igtv[1][1]*dn_x_a + igtv[1][2]*dn_x_b + igtv[1][3]*dn_x_c
	      + igtv[2][1]*dn_y_a + igtv[2][2]*dn_y_b + igtv[2][3]*dn_y_c
	      + igtv[3][1]*dn_z_a + igtv[3][2]*dn_z_b + igtv[3][3]*dn_z_c;
	    tmp1 = 0.5*tmp1;

	    /* XC potential */

	    if (XC_P_switch==1){
	      Vxc_Grid[0][MN] -= tmp0; 
	      Vxc_Grid[1][MN] -= tmp1;
	    }

	    /* XC energy density - XC potential */

	    else if (XC_P_switch==2){
	      Vxc_Grid[0][MN] += tmp0; 
	      Vxc_Grid[1][MN] += tmp1;
	    }

	  }
	}
      }

#pragma omp flush(Vxc_Grid)

    } /* #pragma omp parallel */
  } /* if (XC_switch==4 && XC_P_switch!=0) */

  /****************************************************
            In case of non-collinear spin DFT 
  ****************************************************/

  if (SpinP_switch==3 && XC_P_switch!=2){

#pragma omp parallel shared(Density_Grid,Vxc_Grid,My_NumGrid1) private(OMPID,Nthrds,Nprocs,MN,tmp0,tmp1,theta,phi,sit,cot,sip,cop)
    {

      OMPID = omp_get_thread_num();
      Nthrds = omp_get_num_threads();
      Nprocs = omp_get_num_procs();

      for (MN=OMPID*My_NumGrid1/Nthrds; MN<(OMPID+1)*My_NumGrid1/Nthrds; MN++){

	tmp0 = 0.5*(Vxc_Grid[0][MN] + Vxc_Grid[1][MN]);
	tmp1 = 0.5*(Vxc_Grid[0][MN] - Vxc_Grid[1][MN]);
	theta = Density_Grid[2][MN];
	phi   = Density_Grid[3][MN];
	sit = sin(theta);
	cot = cos(theta);
	sip = sin(phi);
	cop = cos(phi);

	Vxc_Grid[0][MN] =  tmp0 + cot*tmp1;  /* Re Vxc11 */
	Vxc_Grid[1][MN] =  tmp0 - cot*tmp1;  /* Re Vxc22 */
	Vxc_Grid[2][MN] =  tmp1*sit*cop;     /* Re Vxc12 */
	Vxc_Grid[3][MN] = -tmp1*sit*sip;     /* Im Vxc12 */ 
      }

#pragma omp flush(Vxc_Grid)

    } /* #pragma omp parallel */ 
  }

  /*
  {
    int hN1,hN2,hN3,i;
    double Re11,Re22,Re12,Im12;

    hN1 = Ngrid1/2;
    hN2 = Ngrid2/2;
    hN3 = Ngrid3/2;

    for (i=0; i<Num_Cells0; i++){

    MN = i*Ngrid2*Ngrid3 + hN2*Ngrid3 + hN3;
 

    Re11 = Vxc_Grid[0][MN];
    Re22 = Vxc_Grid[1][MN];
    Re12 = Vxc_Grid[2][MN];
    Im12 = Vxc_Grid[3][MN];

    printf("MN=%4d %15.12f %15.12f %15.12f %15.12f\n",
           MN,Re11,Re22,Re12,Im12);
    }
  }


  MPI_Finalize();
  exit(0);
  */


  /****************************************************
   In case of GGA,
   free arrays
   double dEXC_dGD[2][3][My_NumGrid1]
   double dDen_Grid[2][3][My_NumGrid1]
  ****************************************************/

  if (XC_switch==4){

    for (k=0; k<=1; k++){
      for (i=0; i<3; i++){
        free(dDen_Grid[k][i]);
      }
      free(dDen_Grid[k]);
    }
    free(dDen_Grid);

    if (XC_P_switch!=0){
      for (k=0; k<=1; k++){
        for (i=0; i<3; i++){
          free(dEXC_dGD[k][i]);
        }
        free(dEXC_dGD[k]);
      }
      free(dEXC_dGD);
    }
  }
}
   inline void callFunction(mxArray* plhs[], const mxArray*prhs[],
         const int nlhs,const int nrhs) {
      if (!mexCheckType<T>(prhs[0])) 
         mexErrMsgTxt("type of argument 1 is not consistent");

      if (!mxIsStruct(prhs[1])) 
         mexErrMsgTxt("argument 2 should be struct");

      if (nrhs == 3)
         if (!mxIsStruct(prhs[2])) 
            mexErrMsgTxt("argument 3 should be struct");

      Data<T> *X;
      const mwSize* dimsX=mxGetDimensions(prhs[0]);
      int n=static_cast<int>(dimsX[0]);
      int M=static_cast<int>(dimsX[1]);
      if (mxIsSparse(prhs[0])) {
         double * X_v=static_cast<double*>(mxGetPr(prhs[0]));
         mwSize* X_r=mxGetIr(prhs[0]);
         mwSize* X_pB=mxGetJc(prhs[0]);
         mwSize* X_pE=X_pB+1;
         int* X_r2, *X_pB2, *X_pE2;
         T* X_v2;
         createCopySparse<T>(X_v2,X_r2,X_pB2,X_pE2,
               X_v,X_r,X_pB,X_pE,M);
         X = new SpMatrix<T>(X_v2,X_r2,X_pB2,X_pE2,n,M,X_pB2[M]);
      } else {
         T* prX = reinterpret_cast<T*>(mxGetPr(prhs[0]));
         X= new Matrix<T>(prX,n,M);
      }

      int NUM_THREADS = getScalarStructDef<int>(prhs[1],"numThreads",-1);
#ifdef _OPENMP
      NUM_THREADS = NUM_THREADS == -1 ? omp_get_num_procs() : NUM_THREADS;
#else
      NUM_THREADS=1;
#endif 
      int batch_size = getScalarStructDef<int>(prhs[1],"batchsize",
            256*(NUM_THREADS+1));
      mxArray* pr_D = mxGetField(prhs[1],0,"D");
      Trainer<T>* trainer;

      if (!pr_D) {
         int K = getScalarStruct<int>(prhs[1],"K");
         trainer = new Trainer<T>(K,batch_size,NUM_THREADS);
      } else {
         T* prD = reinterpret_cast<T*>(mxGetPr(pr_D));
         const mwSize* dimsD=mxGetDimensions(pr_D);
         int nD=static_cast<int>(dimsD[0]);
         int K=static_cast<int>(dimsD[1]);
         if (n != nD) mexErrMsgTxt("sizes of D are not consistent");
         Matrix<T> D1(prD,n,K);
         if (nrhs == 3) {
            mxArray* pr_A = mxGetField(prhs[2],0,"A");
            if (!pr_A) mexErrMsgTxt("field A is not provided");
            T* prA = reinterpret_cast<T*>(mxGetPr(pr_A));
            const mwSize* dimsA=mxGetDimensions(pr_A);
            int xA=static_cast<int>(dimsA[0]);
            int yA=static_cast<int>(dimsA[1]);
            if (xA != K || yA != K) mexErrMsgTxt("Size of A is not consistent");
            Matrix<T> A(prA,K,K);

            mxArray* pr_B = mxGetField(prhs[2],0,"B");
            if (!pr_B) mexErrMsgTxt("field B is not provided");
            T* prB = reinterpret_cast<T*>(mxGetPr(pr_B));
            const mwSize* dimsB=mxGetDimensions(pr_B);
            int xB=static_cast<int>(dimsB[0]);
            int yB=static_cast<int>(dimsB[1]);
            if (xB != n || yB != K) mexErrMsgTxt("Size of B is not consistent");
            Matrix<T> B(prB,n,K);
            int iter = getScalarStruct<int>(prhs[2],"iter");
            trainer = new Trainer<T>(A,B,D1,iter,batch_size,NUM_THREADS);
         } else {
            trainer = new Trainer<T>(D1,batch_size,NUM_THREADS);
         }
      }

      ParamDictLearn<T> param;
      param.lambda = getScalarStruct<T>(prhs[1],"lambda");
      param.lambda2 = getScalarStructDef<T>(prhs[1],"lambda2",10e-10);
      param.iter=getScalarStruct<int>(prhs[1],"iter");
      param.t0 = getScalarStructDef<T>(prhs[1],"t0",1e-5);
      param.mode =(constraint_type)getScalarStructDef<int>(prhs[1],"mode",PENALTY);
      param.posAlpha = getScalarStructDef<bool>(prhs[1],"posAlpha",false);
      param.posD = getScalarStructDef<bool>(prhs[1],"posD",false);
      param.expand= getScalarStructDef<bool>(prhs[1],"expand",false);
      param.modeD=(constraint_type_D)getScalarStructDef<int>(prhs[1],"modeD",L2);
      param.whiten = getScalarStructDef<bool>(prhs[1],"whiten",false);
      param.clean = getScalarStructDef<bool>(prhs[1],"clean",true);
      param.verbose = getScalarStructDef<bool>(prhs[1],"verbose",true);
      param.gamma1 = getScalarStructDef<T>(prhs[1],"gamma1",0);
      param.gamma2 = getScalarStructDef<T>(prhs[1],"gamma2",0);
      param.rho = getScalarStructDef<T>(prhs[1],"rho",T(1.0));
      param.stochastic = 
         getScalarStructDef<bool>(prhs[1],"stochastic_deprecated",
               false);
      param.modeParam = static_cast<mode_compute>(getScalarStructDef<int>(prhs[1],"modeParam",0));
      param.batch = getScalarStructDef<bool>(prhs[1],"batch",false);
      param.iter_updateD = getScalarStructDef<T>(prhs[1],"iter_updateD",param.batch ? 5 : 1);
      param.log = getScalarStructDef<bool>(prhs[1],"log_deprecated",
            false);
      if (param.log) {
         mxArray *stringData = mxGetField(prhs[1],0,
               "logName_deprecated");
         if (!stringData) 
            mexErrMsgTxt("Missing field logName_deprecated");
         int stringLength = mxGetN(stringData)+1;
         param.logName= new char[stringLength];
         mxGetString(stringData,param.logName,stringLength);
      }

      trainer->train(*X,param);
      if (param.log)
         mxFree(param.logName);

      Matrix<T> D;
      trainer->getD(D);
      int K  = D.n();
      plhs[0] = createMatrix<T>(n,K);
      T* prD2 = reinterpret_cast<T*>(mxGetPr(plhs[0]));
      Matrix<T> D2(prD2,n,K);
      D2.copy(D);

      if (nlhs == 2) {
         mwSize dims[1] = {1};
         int nfields=3; 
         const char *names[] = {"A", "B", "iter"};
         plhs[1]=mxCreateStructArray(1, dims,nfields, names);
         mxArray* prA = createMatrix<T>(K,K);
         T* pr_A= reinterpret_cast<T*>(mxGetPr(prA));
         Matrix<T> A(pr_A,K,K);
         trainer->getA(A);
         mxSetField(plhs[1],0,"A",prA);
         mxArray* prB = createMatrix<T>(n,K);
         T* pr_B= reinterpret_cast<T*>(mxGetPr(prB));
         Matrix<T> B(pr_B,n,K);
         trainer->getB(B);
         mxSetField(plhs[1],0,"B",prB);
         mxArray* priter = createScalar<T>();
         *mxGetPr(priter) = static_cast<T>(trainer->getIter());
         mxSetField(plhs[1],0,"iter",priter);
      }
      delete(trainer);
      delete(X);
   }
Ejemplo n.º 11
0
void Voronoi_Charge()
{
  double time0;
  int Mc_AN,Gc_AN,Mh_AN,h_AN,Gh_AN;
  int Cwan,GNc,GRc,Nog,Nh,MN,spin;
  double x,y,z,dx,dy,dz,fw;
  double Cxyz[4];
  double FuzzyW,sum0,sum1;
  double magx,magy,magz;
  double tmagx,tmagy,tmagz;
  double tden,tmag,theta,phi,rho,mag;
  double den0,den1,vol;
  double VC_S,T_VC0,T_VC1;
  double **VC,*Voronoi_Vol;
  double TStime,TEtime;
  double S_coordinate[3];
  int numprocs,myid,tag=999,ID;
  FILE *fp_VC;
  char file_VC[YOUSO10];
  char buf[fp_bsize];          /* setvbuf */

  MPI_Status stat;
  MPI_Request request;

  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  dtime(&TStime);
  if (myid==Host_ID) printf("\n<Voronoi_Charge>  calculate Voronoi charges\n");fflush(stdout);

  /*****************************************************
    allocation of array
  *****************************************************/

  VC = (double**)malloc(sizeof(double*)*4);
  for (spin=0; spin<4; spin++){
    VC[spin] = (double*)malloc(sizeof(double)*(atomnum+1));
  }

  Voronoi_Vol = (double*)malloc(sizeof(double)*(atomnum+1));

  /*****************************************************
            calculation of Voronoi charge
  *****************************************************/

#pragma omp parallel shared(S_coordinate,GridVol,VC,Voronoi_Vol,Density_Grid,SpinP_switch,MGridListAtom,atv,CellListAtom,GridListAtom,NumOLG,WhatSpecies,M2G,Matomnum) private(OMPID,Nthrds,Nprocs,Mc_AN,Gc_AN,Cwan,sum0,sum1,vol,tden,tmagx,tmagy,tmagz,Nog,GNc,GRc,Cxyz,x,y,z,FuzzyW,MN,den0,den1,theta,phi,rho,mag,magx,magy,magz,tmag)
  {

    /* get info. on OpenMP */ 

    OMPID = omp_get_thread_num();
    Nthrds = omp_get_num_threads();
    Nprocs = omp_get_num_procs();

    for (Mc_AN=1+OMPID; Mc_AN<=Matomnum; Mc_AN+=Nthrds){

      Gc_AN = M2G[Mc_AN];    
      Cwan = WhatSpecies[Gc_AN];

      sum0 = 0.0;
      sum1 = 0.0;
      vol  = 0.0;

      tden  = 0.0;
      tmagx = 0.0;
      tmagy = 0.0;
      tmagz = 0.0;

      for (Nog=0; Nog<NumOLG[Mc_AN][0]; Nog++){

	/* calculate fuzzy weight */

	GNc = GridListAtom[Mc_AN][Nog];
	GRc = CellListAtom[Mc_AN][Nog];

	Get_Grid_XYZ(GNc,Cxyz);
	x = Cxyz[1] + atv[GRc][1];
	y = Cxyz[2] + atv[GRc][2]; 
	z = Cxyz[3] + atv[GRc][3];
	FuzzyW = Fuzzy_Weight(Gc_AN,Mc_AN,0,x,y,z);

	/* find charge */

	MN = MGridListAtom[Mc_AN][Nog];

	if (SpinP_switch<=1){

	  den0  = Density_Grid[0][MN];
	  den1  = Density_Grid[1][MN];

	  /* sum density */
	  sum0 += den0*FuzzyW; 
	  sum1 += den1*FuzzyW; 

	  /* sum volume */
          vol += FuzzyW;

	}

	else{

	  den0  = Density_Grid[0][MN];
	  den1  = Density_Grid[1][MN];
	  theta = Density_Grid[2][MN];
	  phi   = Density_Grid[3][MN];

	  rho = den0 + den1;
	  mag = den0 - den1;
	  magx = mag*sin(theta)*cos(phi);
	  magy = mag*sin(theta)*sin(phi);
	  magz = mag*cos(theta);

	  /* sum density */
 
	  tden  +=  rho*FuzzyW; 
	  tmagx += magx*FuzzyW; 
	  tmagy += magy*FuzzyW; 
	  tmagz += magz*FuzzyW; 

	  /* sum volume */
          vol += FuzzyW;
	}

      }

      if (SpinP_switch<=1){
	VC[0][Gc_AN] = sum0*GridVol; 
	VC[1][Gc_AN] = sum1*GridVol;
      }

      else {

	tmag = sqrt(tmagx*tmagx + tmagy*tmagy + tmagz*tmagz); 
	sum0 = 0.5*(tden + tmag);
	sum1 = 0.5*(tden - tmag);

	xyz2spherical( tmagx,tmagy,tmagz, 0.0,0.0,0.0, S_coordinate ); 

	VC[0][Gc_AN] = sum0*GridVol; 
	VC[1][Gc_AN] = sum1*GridVol;
	VC[2][Gc_AN] = S_coordinate[1];
	VC[3][Gc_AN] = S_coordinate[2];
      }

      Voronoi_Vol[Gc_AN] = vol*GridVol*BohrR*BohrR*BohrR;

    } /* Mc_AN */

  } /* #pragma omp parallel */

  /*****************************************************
    MPI VC
  *****************************************************/

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&VC[0][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1);
  }

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&VC[1][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1);
  }

  if (SpinP_switch==3){

    for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
      ID = G2ID[Gc_AN];
      MPI_Bcast(&VC[2][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1);
    }

    for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
      ID = G2ID[Gc_AN];
      MPI_Bcast(&VC[3][Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1);
    }
  }

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Voronoi_Vol[Gc_AN], 1, MPI_DOUBLE, ID, mpi_comm_level1);
  }

  VC_S = 0.0;
  T_VC0 = 0.0;
  T_VC1 = 0.0;
  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    VC_S += VC[0][Gc_AN] - VC[1][Gc_AN];  
    T_VC0 += VC[0][Gc_AN];
    T_VC1 += VC[1][Gc_AN];
  }

  /****************************************
   file, *.VC
  ****************************************/

  if ( myid==Host_ID ){

    sprintf(file_VC,"%s%s.VC",filepath,filename);

    if ((fp_VC = fopen(file_VC,"w")) != NULL){

#ifdef xt3
      setvbuf(fp_VC,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

      fprintf(fp_VC,"\n");
      fprintf(fp_VC,"***********************************************************\n");
      fprintf(fp_VC,"***********************************************************\n");
      fprintf(fp_VC,"                     Voronoi charges                       \n");
      fprintf(fp_VC,"***********************************************************\n");
      fprintf(fp_VC,"***********************************************************\n\n");

      fprintf(fp_VC,"  Sum of Voronoi charges for up    = %15.12f\n", T_VC0);
      fprintf(fp_VC,"  Sum of Voronoi charges for down  = %15.12f\n", T_VC1);
      fprintf(fp_VC,"  Sum of Voronoi charges for total = %15.12f\n\n",
              T_VC0+T_VC1);

      fprintf(fp_VC,"  Total spin magnetic moment (muB) by Voronoi charges  = %15.12f\n\n",VC_S);

      if (SpinP_switch<=1){

	fprintf(fp_VC,"                     Up spin      Down spin     Sum           Diff       Voronoi Volume (Ang.^3)\n");
	for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
	  fprintf(fp_VC,"       Atom=%4d  %12.9f %12.9f  %12.9f  %12.9f  %12.9f\n",
		  Gc_AN, VC[0][Gc_AN], VC[1][Gc_AN],
		  VC[0][Gc_AN] + VC[1][Gc_AN],
		  VC[0][Gc_AN] - VC[1][Gc_AN],
                  Voronoi_Vol[Gc_AN]);
	}
      }

      else{
	fprintf(fp_VC,"                     Up spin      Down spin     Sum           Diff        Theta(Deg)   Phi(Deg)   Voronoi Volume (Ang.^3)\n");
	for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
	  fprintf(fp_VC,"       Atom=%4d  %12.9f %12.9f  %12.9f  %12.9f  %8.4f    %8.4f   %12.9f\n",
		  Gc_AN, VC[0][Gc_AN], VC[1][Gc_AN],
		  VC[0][Gc_AN] + VC[1][Gc_AN],
		  VC[0][Gc_AN] - VC[1][Gc_AN],
                  VC[2][Gc_AN]/PI*180.0,VC[3][Gc_AN]/PI*180.0,
                  Voronoi_Vol[Gc_AN]);
	}
      }

      fclose(fp_VC);
    }
    else{
      printf("Failure of saving the VC file.\n");
    }

  }

  /*****************************************************
    freeing of array
  *****************************************************/

  for (spin=0; spin<4; spin++){
    free(VC[spin]);
  }
  free(VC);

  free(Voronoi_Vol);

  /* for time */
  dtime(&TEtime);
  time0 = TEtime - TStime;

}
Ejemplo n.º 12
0
//compact B
void mexFunction(int nlhs, mxArray *plhs[], 
				 int nrhs, const mxArray *prhs[])
{
	int num_thd = omp_get_num_procs();
	omp_set_num_threads(num_thd);

	SMART_ASSERT(nrhs == 4).Exit();

	SMatrix<double> matZ;
	Vector<SMatrix<double> > vecmatDictionary;
	int num_dic;

	mexConvert(MAT_Z, matZ);
	mexConvert(MAT_DIC, vecmatDictionary);
	mexConvert(N_SPARSITY, num_dic);
	SMART_ASSERT(num_dic > 0)(num_dic).Exit();

	int num_partitions = vecmatDictionary.size();

	mwSize size[2];
	size[0] = num_dic * num_partitions;
	size[1] = matZ.Rows();

	//BINARY_REPRESENTATION = mxCreateNumericArray(2, size, mxINT16_CLASS, mxREAL);
	BINARY_REPRESENTATION = mxCreateNumericArray(2, size, mxUINT8_CLASS, mxREAL);
	SMatrix<CodeType> matRepresentation;
	mexConvert(BINARY_REPRESENTATION, matRepresentation);

	// method:
	// 2: ock-means described in the paper
	// others: jck-means
	IndexEncoding mp;

	if (num_dic > 1)
	{
		SMART_ASSERT(mxIsEmpty(PARAMETER) == false).Exit();

		int is_initialize;
		int num_grouped;
		mexConvert(mxGetField(PARAMETER, 0, "is_initialize"), is_initialize);;


		TypeEncoding type_encoding = Type_gk_means;
		int is_ock = 0;
		{
			mxArray* p2 = mxGetField(PARAMETER, 0, "is_ock");
			if (p2)
			{

				mexConvert(p2, is_ock);
				if (is_ock)
				{
					type_encoding = Type_ock;
				}
			}
		}
		{
			mxArray* p2 = mxGetField(PARAMETER, 0, "encoding_type");
			if (p2)
			{
				string str_encoding_type;
				mexConvert(p2, str_encoding_type);

				{
					if (is_ock)
					{
						SMART_ASSERT(str_encoding_type == "ock").Exit();
					}
				}

				if (str_encoding_type == "gkmeans")
				{
					mp.SetEncodingType(Type_gk_means);
					mexConvert(mxGetField(PARAMETER, 0, "num_grouped"), num_grouped);
					mp.SetNumberGroup(num_grouped);
				}
				else if (str_encoding_type == "ock")
				{
					mp.SetEncodingType(Type_ock);
					int num_can;
					mexConvert(mxGetField(PARAMETER, 0, "num_can"), num_can);
					mp.SetCandidateNumber(num_can);
				}
				else if (str_encoding_type == "additive_quantization")
				{
					mp.SetEncodingType(Type_additive_quantization);
					int num_can;
					mexConvert(mxGetField(PARAMETER, 0, "num_can"), num_can);
					mp.SetCandidateNumber(num_can);
				}
				else
				{
					SMART_ASSERT(0)(str_encoding_type).Exit();
				}
			}
		}

		mp.SetIsInitialize(is_initialize);

		if (!is_initialize)
		{
			SMatrix<CodeType> mat_old;
			mexConvert(mxGetField(PARAMETER, 0, "old_codes"), mat_old);
			memcpy(matRepresentation.Ptr(), mat_old.Ptr(), sizeof(CodeType) * mat_old.Rows() * mat_old.Cols());
		}
	}

	mp.Solve(matZ, vecmatDictionary, num_dic, matRepresentation);
}
Ejemplo n.º 13
0
static ERR_VALUE _init_default_values()
{
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	ret = option_add_UInt32(PROGRAM_OPTION_KMERSIZE, 5);
	if (ret == ERR_SUCCESS)
		ret = option_add_String(PROGRAM_OPTION_SEQFILE, "\0");

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt64(PROGRAM_OPTION_SEQSTART, (uint64_t)-1);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_SEQLEN, 100);


	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_TEST_STEP, 1500);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_THRESHOLD, 0);
	
	if (ret == ERR_SUCCESS)
		ret = option_add_String(PROGRAM_OPTION_READFILE, "\0");

	if (ret == ERR_SUCCESS)
		ret = option_add_String(PROGRAM_OPTION_OUTPUT_DIRECTORY, ".");

	if (ret == ERR_SUCCESS)
		ret = option_add_String(PROGRAM_OPTION_VCFFILE, "\0");

	if (ret == ERR_SUCCESS)
		ret = option_add_Int32(PROGRAM_OPTION_OMP_THREADS, omp_get_num_procs());

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt8(PROGRAM_OPTION_READ_POS_QUALITY, 10);

	if (ret == ERR_SUCCESS)
		ret = option_add_Boolean(PROGRAM_OPTION_NO_CONNECT_REFSEQ, FALSE);

	if (ret == ERR_SUCCESS)
		ret = option_add_Boolean(PROGRAM_OPTION_NO_CONNECT_READS, FALSE);
	
	if (ret == ERR_SUCCESS)
		ret = option_add_Boolean(PROGRAM_OPTION_NO_BUBBLE_MERGING, FALSE);
	
	if (ret == ERR_SUCCESS)
		ret = option_add_Boolean(PROGRAM_OPTION_NO_LINEAR_SHRINK, FALSE);

	if (ret == ERR_SUCCESS)
		ret = option_add_Boolean(PROGRAM_OPTION_NO_HELPER_VERTICES, FALSE);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_MISSING_EDGE_PENALTY, 3);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_BACKWARD_REFSEQ_PENALTY, 2);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_MAX_PATHS, 10);

	if (ret == ERR_SUCCESS)
		ret = option_add_UInt32(PROGRAM_OPTION_READ_MAX_ERROR_RATE, 20);

	option_set_description_const(PROGRAM_OPTION_KMERSIZE, PROGRAM_OPTION_KMERSIZE_DESC);
	option_set_description_const(PROGRAM_OPTION_SEQFILE, PROGRAM_OPTION_SEQFILE_DESC);
	option_set_description_const(PROGRAM_OPTION_SEQSTART, PROGRAM_OPTION_SEQSTART_DESC);
	option_set_description_const(PROGRAM_OPTION_SEQLEN, PROGRAM_OPTION_SEQLEN_DESC);
	option_set_description_const(PROGRAM_OPTION_TEST_STEP, PROGRAM_OPTION_TEST_STEP_DESC);
	option_set_description_const(PROGRAM_OPTION_THRESHOLD, PROGRAM_OPTION_THRESHOLD_DESC);
	option_set_description_const(PROGRAM_OPTION_READFILE, PROGRAM_OPTION_READFILE_DESC);
	option_set_description_const(PROGRAM_OPTION_OUTPUT_DIRECTORY, PROGRAM_OPTION_OUTPUT_DIRECTORY_DESC);
	option_set_description_const(PROGRAM_OPTION_VCFFILE, PROGRAM_OPTION_VCFFILE_DESC);

	option_set_shortcut(PROGRAM_OPTION_KMERSIZE, 'k');
	option_set_shortcut(PROGRAM_OPTION_SEQFILE, 'f');
	option_set_shortcut(PROGRAM_OPTION_SEQSTART, 'S');
	option_set_shortcut(PROGRAM_OPTION_SEQLEN, 'l');
	option_set_shortcut(PROGRAM_OPTION_TEST_STEP, 'e');
	option_set_shortcut(PROGRAM_OPTION_THRESHOLD, 'w');
	option_set_shortcut(PROGRAM_OPTION_READFILE, 'F');
	option_set_shortcut(PROGRAM_OPTION_OUTPUT_DIRECTORY, 'o');
	option_set_shortcut(PROGRAM_OPTION_VCFFILE, 'v');

	return ret;
}
Ejemplo n.º 14
0
int main(int argc, char *argv[])
{
	ERR_VALUE ret = ERR_INTERNAL_ERROR;

	utils_allocator_init(omp_get_num_procs());
	omp_init_lock(&_readCoverageLock);
#ifdef _MSC_VER
	uint64_t startTime = GetTickCount64();
#endif
	ret = options_module_init(37);
	if (ret == ERR_SUCCESS) {
		ret = _init_default_values();
		if (ret == ERR_SUCCESS) {
			ret = options_parse_command_line(argc - 2, argv + 2);
			if (ret == ERR_SUCCESS) {
				PROGRAM_OPTIONS po;
				PROGRAM_STATISTICS st;

				memset(&st, 0, sizeof(st));
				ret = _capture_program_options(&po);
				if (ret == ERR_SUCCESS) {
					omp_set_num_threads(po.OMPThreads);
					const char *cmd = argv[1];
					if (strncmp(cmd, "help", sizeof("help")) == 0) {
						options_print_help();
					} else if (strncmp(cmd, "repair", sizeof("repair")) == 0) {
						size_t refSeqLen = 0;
						FASTA_FILE seqFile;
						char *rsFasta = NULL;

						ret = fasta_load(po.RefSeqFile, &seqFile);
						if (ret == ERR_SUCCESS) {
							ret = fasta_read_seq(&seqFile, &rsFasta, &refSeqLen);
							po.ReferenceSequence = rsFasta;
							if (ret != ERR_SUCCESS)
								fasta_free(&seqFile);
						}

						if (ret == ERR_SUCCESS) {
							ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_vertexLAs);
							if (ret == ERR_SUCCESS)
								ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_edgeLAs);

							if (ret == ERR_SUCCESS) {
								ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_ONE_READ), &po.ReadSubArrays);
								if (ret == ERR_SUCCESS) {
									const size_t numThreads = omp_get_num_procs();
									for (size_t i = 0; i < numThreads; ++i) {
										dym_array_init_ONE_READ(po.ReadSubArrays + i, 140);
										_vertexLAs[i] = NULL;
										_edgeLAs[i] = NULL;
									}

									size_t regionCount = 0;
									PACTIVE_REGION regions = NULL;

									ret = input_refseq_to_regions(po.ReferenceSequence, refSeqLen, &regions, &regionCount);
									if (ret == ERR_SUCCESS) {
										const ACTIVE_REGION *pa = NULL;

										pa = regions;
										for (size_t i = 0; i < regionCount; ++i) {
											if (pa->Type == artValid && pa->Length >= po.RegionLength)
												_activeRegionCount += (pa->Length / po.TestStep);

											++pa;
										}

										_activeRegionProcessed = 0;
										pa = regions;
										for (size_t i = 0; i < regionCount; ++i) {
											if (pa->Type == artValid && pa->Length >= po.RegionLength)
												repair_reads_in_parallel(pa, &po);
													
											++pa;
										}

										input_free_regions(regions, regionCount);
									}

									PONE_READ r = po.Reads;
									for (size_t i = 0; i < po.ReadCount; ++i) {
										if (r->NumberOfFixes * 100 / r->ReadSequenceLen <= po.ParseOptions.ReadMaxErrorRate) {
											read_quality_encode(r);
											read_write_sam(stdout, r);
											read_quality_decode(r);
										}

										++r;
									}

									utils_free(rsFasta);
									int i = 0;
#pragma omp parallel for shared (po)
									for (i = 0; i < numThreads; ++i)
										dym_array_finit_ONE_READ(po.ReadSubArrays + i);

									utils_free(po.ReadSubArrays);
								}
							}

							utils_free(_edgeLAs);
							utils_free(_vertexLAs);
							fasta_free(&seqFile);
						}
					} else if (strncmp(cmd, "rfreq", sizeof("rfreq")) == 0) {
						kmer_freq_distribution(&po, po.KMerSize, po.Reads, po.ReadCount);
					} else if (strncmp(cmd, "call", sizeof("call")) == 0) {
						fprintf(stderr, "K-mer size:                 %u\n", po.KMerSize);
						fprintf(stderr, "Active region length:       %u\n", po.RegionLength);
						fprintf(stderr, "Reference:                  %s\n", po.RefSeqFile);
						fprintf(stderr, "Reads:                      %u\n", po.ReadCount);
						fprintf(stderr, "Read coverage threshold:    %u\n", po.Threshold);
						fprintf(stderr, "Min. read position quality: %u\n", po.ReadPosQuality);
						fprintf(stderr, "OpenMP thread count:        %i\n", po.OMPThreads);
						fprintf(stderr, "Output VCF file:            %s\n", po.VCFFile);
						ret = paired_reads_init();
						if (ret == ERR_SUCCESS) {
							if (ret == ERR_SUCCESS) {
								size_t refSeqLen = 0;
								FASTA_FILE seqFile;
								char *rsFasta = NULL;

								ret = fasta_load(po.RefSeqFile, &seqFile);
								if (ret == ERR_SUCCESS) {
									ret = fasta_read_seq(&seqFile, &rsFasta, &refSeqLen);
									po.ReferenceSequence = rsFasta;
									if (ret != ERR_SUCCESS)
										fasta_free(&seqFile);
								}

								if (ret == ERR_SUCCESS) {
									po.VCFFileHandle = NULL;
									if (*po.VCFFile != '\0') {
										ret = utils_fopen(po.VCFFile, FOPEN_MODE_WRITE, &po.VCFFileHandle);
										if (ret == ERR_SUCCESS)
											dym_array_init_VARIANT_CALL(&po.VCArray, 140);
									}

									if (ret == ERR_SUCCESS) {
										ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_vertexLAs);
										if (ret == ERR_SUCCESS)
											ret = utils_calloc(omp_get_num_procs(), sizeof(PUTILS_LOOKASIDE), &_edgeLAs);
										
										ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_VARIANT_CALL), &po.VCSubArrays);
										if (ret == ERR_SUCCESS) {
											ret = utils_calloc(omp_get_num_procs(), sizeof(GEN_ARRAY_ONE_READ), &po.ReadSubArrays);
											if (ret == ERR_SUCCESS) {
												const size_t numThreads = omp_get_num_procs();
												for (size_t i = 0; i < numThreads; ++i) {
													dym_array_init_VARIANT_CALL(po.VCSubArrays + i, 140);
													dym_array_init_ONE_READ(po.ReadSubArrays + i, 140);
													_vertexLAs[i] = NULL;
													_edgeLAs[i] = NULL;
												}

												size_t regionCount = 0;
												PACTIVE_REGION regions = NULL;

												ret = input_refseq_to_regions(po.ReferenceSequence, refSeqLen, &regions, &regionCount);
												if (ret == ERR_SUCCESS) {
													const ACTIVE_REGION *pa = NULL;

													pa = regions;
													for (size_t i = 0; i < regionCount; ++i) {
														if (pa->Type == artValid && pa->Length >= po.RegionLength)
															_activeRegionCount += (pa->Length / po.TestStep);

														++pa;
													}
														
													_activeRegionProcessed = 0;
													pa = regions;
													for (size_t i = 0; i < regionCount; ++i) {
														if (pa->Type == artValid && pa->Length >= po.RegionLength)
															process_active_region_in_parallel(pa, &po);
														
														++pa;
													}
														
													input_free_regions(regions, regionCount);
												}

												utils_free(rsFasta);
												ret = vc_array_merge(&po.VCArray, po.VCSubArrays, numThreads);
												int i = 0;
#pragma omp parallel for shared(po)
												for (i = 0; i <(int) numThreads; ++i) {
													dym_array_finit_ONE_READ(po.ReadSubArrays + i);
													vc_array_finit(po.VCSubArrays + i);
												}

												utils_free(po.ReadSubArrays);
											}

											utils_free(po.VCSubArrays);
										}

										utils_free(_edgeLAs);
										utils_free(_vertexLAs);

										if (po.VCFFileHandle != NULL) {
											if (ret == ERR_SUCCESS)
												vc_array_print(po.VCFFileHandle, &po.VCArray);

											vc_array_finit(&po.VCArray);
											utils_fclose(po.VCFFileHandle);
										}

									}

									fasta_free(&seqFile);
								}
							} else printf("fix_reads(): %u\n", ret);

							printf("Read coverage: %lf\n", _readBaseCount / _totalRegionLength );
							paired_reads_finit();
						}
					}
				}
			}
		}
	
		options_module_finit();
	}

#ifdef _MSC_VER
	uint64_t endTime = GetTickCount64();
	fprintf(stderr, "Time: %I64u s\n", (endTime - startTime) / 1000);
#endif
	omp_destroy_lock(&_readCoverageLock);

	return ret;
}
Ejemplo n.º 15
0
Archivo: test1.c Proyecto: florianl/lis
LIS_INT main(LIS_INT argc, char* argv[])
{
	LIS_MATRIX A0,A;
	LIS_VECTOR x,b,u;
	LIS_SOLVER solver;
	LIS_INT	nprocs,my_rank;
	int int_nprocs,int_my_rank;
	LIS_INT nsol,rhs,len;
	LIS_INT	err,iter,iter_double,iter_quad;
	double time,itime,ptime,p_c_time,p_i_time;
	LIS_REAL resid;
	char solvername[128];

	LIS_DEBUG_FUNC_IN;

	lis_initialize(&argc, &argv);

	#ifdef USE_MPI
		MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
		MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
		nprocs = int_nprocs;
		my_rank = int_my_rank;
	#else
		nprocs  = 1;
		my_rank = 0;
	#endif

	if( argc < 5 )
	{
	  if( my_rank==0 ) 
	    {
	      printf("Usage: %s matrix_filename rhs_setting solution_filename rhistory_filename [options]\n", argv[0]);
	    }
	  CHKERR(1);	  
	}

	len = (LIS_INT)strlen(argv[2]);
	if( len==1 )
	{
		if( argv[2][0]=='0' || argv[2][0]=='1' || argv[2][0]=='2' )
		{
			rhs = atoi(argv[2]);
		}
		else
		{
			rhs = -1;
		}
	}
	else
	{
		rhs = -1;
	}

		if( my_rank==0 )
		  {
		    printf("\n");
#ifdef _LONG__LONG
		    printf("number of processes = %lld\n",nprocs);
#else
		    printf("number of processes = %d\n",nprocs);
#endif
		  }

#ifdef _OPENMP
		if( my_rank==0 )
		  {
#ifdef _LONG__LONG
		    printf("max number of threads = %lld\n",omp_get_num_procs());
		    printf("number of threads = %lld\n",omp_get_max_threads());
#else
		    printf("max number of threads = %d\n",omp_get_num_procs());
		    printf("number of threads = %d\n",omp_get_max_threads());
#endif
		  }
#endif

	/* read matrix and vectors from file */
	err = lis_matrix_create(LIS_COMM_WORLD,&A); CHKERR(err);
	err = lis_vector_create(LIS_COMM_WORLD,&b); CHKERR(err);
	err = lis_vector_create(LIS_COMM_WORLD,&x); CHKERR(err);
	err = lis_input(A,b,x,argv[1]);
	CHKERR(err);

	err = lis_matrix_duplicate(A,&A0);
	CHKERR(err);
	lis_matrix_set_type(A0,LIS_MATRIX_CSR);
	err = lis_matrix_convert(A,A0);
	CHKERR(err);
	lis_matrix_destroy(A);
	A = A0;

	err = lis_vector_duplicate(A,&u);
	CHKERR(err);
	if( lis_vector_is_null(b) )
	{
		lis_vector_destroy(b);
		lis_vector_duplicate(A,&b);
		CHKERR(err);
		if( rhs==0 )
		  {
		    CHKERR(1);	  
		  }
		else if( rhs==1 )
		  {
		    err = lis_vector_set_all(1.0,b);
		  }
		else
		  {
		    err = lis_vector_set_all(1.0,u);
		    lis_matvec(A,u,b);
		  }
	}
	if( rhs==-1 )
	{
		lis_input_vector(b,argv[2]);
	}
	if( lis_vector_is_null(x) )
	{
		lis_vector_destroy(x);
		err = lis_vector_duplicate(A,&x);
		CHKERR(err);
	}

	err = lis_solver_create(&solver); CHKERR(err);
	lis_solver_set_option("-print mem",solver);
	lis_solver_set_optionC(solver);

	err = lis_solve(A,b,x,solver); 

	CHKERR(err);
	lis_solver_get_iterex(solver,&iter,&iter_double,&iter_quad);
	lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
	lis_solver_get_residualnorm(solver,&resid);
	lis_solver_get_solver(solver,&nsol);
	lis_solver_get_solvername(nsol,solvername);
	

	/* write results */
	if( my_rank==0 )
	{
#ifdef _LONG__LONG
#ifdef _LONG__DOUBLE
		printf("%s: number of iterations = %lld \n",solvername, iter);
#else
		printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad);
#endif
#else
#ifdef _LONG__DOUBLE
		printf("%s: number of iterations = %d \n",solvername, iter);
#else
		printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad);
#endif
#endif
		printf("%s: elapsed time         = %e sec.\n",solvername,time);
		printf("%s:   preconditioner     = %e sec.\n",solvername, ptime);
		printf("%s:     matrix creation  = %e sec.\n",solvername, p_c_time);
		printf("%s:   linear solver      = %e sec.\n",solvername, itime);
#ifdef _LONG__DOUBLE
		printf("%s: relative residual    = %Le\n\n",solvername,resid);
#else
		printf("%s: relative residual    = %e\n\n",solvername,resid);
#endif
	}

	/* write solution */
	lis_output_vector(x,LIS_FMT_MM,argv[3]);

	/* write residual history */
	lis_solver_output_rhistory(solver, argv[4]);

	lis_solver_destroy(solver);
	lis_vector_destroy(x);
	lis_vector_destroy(u);
	lis_vector_destroy(b);
	lis_matrix_destroy(A);

	lis_finalize();

	LIS_DEBUG_FUNC_OUT;

	return 0;
}
// ============================================================
int main( const int nArg, const char *aArg[] )
{
// BEGIN OMP
    gnThreadsMaximum = omp_get_num_procs();
// END OMP

    int iArg = 1;
    for( iArg = 1; iArg < nArg; iArg++ )
    {
        if (aArg[ iArg ][0] == '-' )
        {
            if (aArg[iArg][1] == 'j')
            {
                iArg++;
                if (iArg > nArg)
                    return printf( "Invalid # of threads to use.\n" );
                gnThreadsActive = atoi( aArg[ iArg ] );
                if (gnThreadsActive < 0)
                    gnThreadsActive = 0;
                if (gnThreadsActive > gnThreadsMaximum)
                    gnThreadsActive = gnThreadsMaximum;
            }
        }
        else
            break;
    }

    prime_t max = (nArg > iArg)
        ? (prime_t) atou( aArg[ iArg ] )
//      :          6; // Test 6i+1>max && isprime(6i+1)==true
//      :         32; // Test 8 core
//      :         64; // Test 8 core
//      :        255; // 2^8 Test 8 core
//      :        256; //10^3 Test 8 core [54] = 251 // Largest 8-bit prime

//      :        100; //10^2    [             25] =            97 // 25 primes between 1 and 100
//      :       1000; //10^3    [            168] =           997
//             10000; //10^4    [          1,229] =         9,973 //
//      :      65536; // 2^16   [          6,542] =        65,521 // x86: 00:00:00.001, x64: 00:00:00.000  Primes/Sec: 64,000,000 K#/s Largest 16-bit prime
//      :     100000; //10^5    [          9,592] =        99,991 // x86: 00:00:00.001, x64: 00:00:00.000  Primes/Sec: 97,000,000 K#/s
//      :     611953; //        [         50,000] =       611,953 // x86: 00:00:00.002, x64: 00:00:00.002  Primes/Sec: 298,500 K#/s    First 50,000 primes
//      :    1000000; //10^6    [         78,498] =       999,983 // x86: 00:00:00.003, x64: 00:00:00.002  Primes/Sec: 488,000 K#/s
        :   10000000; //10^7    [        664,579] =     9,999,991 // x86: 00:00:00.031, x64: 00:00:00.034  Primes/Sec: 264 M#/s
//      :   15485863; //        [      1,000,000] =    15,485,863 // x86: 00:00:00.057, x64: 00:00:00.055  Primes/Sec: 254 M#/s        First 1,000,000 primes
//      :  100000000; //10^8    [      5,761,455] =    99,999,989 // x86: 00:00:00.490, x64: 00:00:00.484  Primes/Sec: 196 M#/s
//      : 1000000000; //10^9    [     50,847,534] =   999,999,937 // x86: crash         x64: 00:00:10.590  Primes/Sec: 89 M#/s
//      : 2038074743; //        [    100,000,000] = 2,038,074,743 //                    x64: 00:00:23.130  Primes/Sec: 84 M#/s First 100,000,000 primes
//      : 2147483644; // 2^31-4 [    105,097,564] = 2,147,483,629 //                    x64: 00:00:24.502  Primes/Sec: 83 M#/s
//      : 2147483647; // 2^31-1 [               ]
//      : 2147483648; // 2^31   [    105,097,565] = 2,147,483,647 //                    x64: 00:00:43.818  Primes/Sec: 46 M#/s
//      : 4294967292; // 2^32-4 [               ]
//      : 4294967295; // 2^32-1 [               ]
//      : 4294967296; // 2^32   [    203,280,221] =
//      :10000000000; //10^10   [    455,052,511] =
//      :       1e11; //10^11   [  4,118,054,813] =
//      :       1e12; //10^12   [ 37,607,912,018] =
//      :       1e13; //10^13   [346,065,536,839] =

    AllocArray ( max );
    TimerStart ( max );
    BuildPrimes( max );
    TimerStop  ( max );
    getchar();
    PrintPrimes();
    DeleteArray();

    return 0;
}
Ejemplo n.º 17
0
LIS_INT main(LIS_INT argc, char* argv[])
{
    LIS_INT i,n,gn,is,ie;
    LIS_INT nprocs,my_rank;
    int int_nprocs,int_my_rank;
    LIS_INT nesol;
    LIS_MATRIX A;
    LIS_VECTOR x;
    LIS_REAL evalue0;
    LIS_ESOLVER esolver;
    LIS_REAL residual;
    LIS_INT iter;
    double time;
    double itime,ptime,p_c_time,p_i_time;
    char esolvername[128];
    
    LIS_DEBUG_FUNC_IN;

    lis_initialize(&argc, &argv);

#ifdef USE_MPI
    MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
    nprocs = int_nprocs;
    my_rank = int_my_rank;
#else
    nprocs  = 1;
    my_rank = 0;
#endif
    
    if( argc < 2 )
      {
	if( my_rank==0 ) 
	  {
	      printf("Usage: %s n [eoptions]\n", argv[0]);
	  }
	CHKERR(1);
      }

  if( my_rank==0 )
    {
      printf("\n");
      printf("number of processes = %d\n",nprocs);
    }

#ifdef _OPENMP
  if( my_rank==0 )
    {
#ifdef _LONG__LONG
      printf("max number of threads = %lld\n",omp_get_num_procs());
      printf("number of threads = %lld\n",omp_get_max_threads());
#else
      printf("max number of threads = %d\n",omp_get_num_procs());
      printf("number of threads = %d\n",omp_get_max_threads());
#endif
    }
#endif
		
    /* generate coefficient matrix for one dimensional Poisson equation */
    n = atoi(argv[1]);
    lis_matrix_create(LIS_COMM_WORLD,&A);
    lis_matrix_set_size(A,0,n);
    lis_matrix_get_size(A,&n,&gn);
    lis_matrix_get_range(A,&is,&ie);
    for(i=is;i<ie;i++)
    {
      if( i>0   )  lis_matrix_set_value(LIS_INS_VALUE,i,i-1,-1.0,A);
      if( i<gn-1 ) lis_matrix_set_value(LIS_INS_VALUE,i,i+1,-1.0,A);
      lis_matrix_set_value(LIS_INS_VALUE,i,i,2.0,A);
    }
    lis_matrix_set_type(A,LIS_MATRIX_CSR);
    lis_matrix_assemble(A);
    lis_vector_duplicate(A,&x);

    lis_esolver_create(&esolver);
    lis_esolver_set_option("-eprint mem",esolver);
    lis_esolver_set_optionC(esolver);
    lis_esolve(A, x, &evalue0, esolver);
    lis_esolver_get_esolver(esolver,&nesol);
    lis_esolver_get_esolvername(nesol,esolvername);
    lis_esolver_get_residualnorm(esolver, &residual);
    lis_esolver_get_iter(esolver, &iter);
    lis_esolver_get_timeex(esolver,&time,&itime,&ptime,&p_c_time,&p_i_time);
    if( my_rank==0 ) {
      printf("%s: mode number          = %d\n", esolvername, 0);
#ifdef _LONG__DOUBLE
      printf("%s: eigenvalue           = %Le\n", esolvername, evalue0);
#else
      printf("%s: eigenvalue           = %e\n", esolvername, evalue0);
#endif
#ifdef _LONG__LONG
      printf("%s: number of iterations = %lld\n",esolvername, iter);
#else
      printf("%s: number of iterations = %d\n",esolvername, iter);
#endif
      printf("%s: elapsed time         = %e sec.\n", esolvername, time);
      printf("%s:   preconditioner     = %e sec.\n", esolvername, ptime);
      printf("%s:     matrix creation  = %e sec.\n", esolvername, p_c_time);
      printf("%s:   linear solver      = %e sec.\n", esolvername, itime);
#ifdef _LONG__DOUBLE
      printf("%s: relative residual    = %Le\n\n",esolvername, residual);
#else
      printf("%s: relative residual    = %e\n\n",esolvername, residual);
#endif
  }

    /*
    lis_vector_nrm2(x, &xnrm2);
    lis_vector_scale((1/xnrm2*sqrt(n)), x);
    lis_vector_print(x);
    */

    /*
    lis_vector_create(LIS_COMM_WORLD,&y);
    lis_matrix_create(LIS_COMM_WORLD,&B);
    lis_esolver_get_evalues(esolver,y);
    lis_esolver_get_evectors(esolver,B);
    lis_output_vector(y,LIS_FMT_MM,"evalues.out");
    lis_output_matrix(B,LIS_FMT_MM,"evectors.out");
    lis_vector_destroy(y);
    lis_matrix_destroy(B);
    */

    lis_esolver_destroy(esolver);
    lis_matrix_destroy(A);
    lis_vector_destroy(x);

    lis_finalize();

    LIS_DEBUG_FUNC_OUT;

    return 0;
}
Ejemplo n.º 18
0
// using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
	wcout << L"StatisGA:" << endl;
// 	setlocale( LC_ALL, "chs" );
// 	wstring test = L"ищ";
	// 	wcout << test<<endl;

	wstring InitialParaFile = L"Para.txt";
	wstring OutpuParaFile = L"Para.txt";
	
	bool bMode = false;
 	wcout << L"Choose mode: 0 for Train(default); 1 for test" << endl;
 	wcin >> bMode;
	
	NumCharact NCharact;

	wstring InputDataFile = L"399300";
	//if (true == bMode)
	//{
	//	wcout << L"Input data file name:";
	//	wcin >> InputDataFile;
	//}	
	//wchar_t CurrentPath[1024];
	//GetCurrentDirectory(1024, CurrentPath);

	vector<vector<double>> OriginalDataArray;
	vector<wstring> DateArray;
	ReadDataFile(DateArray, OriginalDataArray, InputDataFile);
	
	int DataStart = 150;
	double StartValue = 0;
	if (true == bMode)
	{
		DataStart = 0;
	//	wcout << L"Input Start value:";
	//	wcin >> StartValue;
	}
	int CyclePara = 1;
	NCharact.DataPreprocess(CyclePara, DataStart, StartValue, OriginalDataArray);

	NCharact.TrainFactorLength = 30;
	NCharact.FactorNumber = 14;
	NCharact.CheckSize = 2; 
	const int CalcuNumberK = 6; 

	NCharact.FactorRange = 10901;
	const int TolerantRange = 250; 
	const int TolerantNumber = 2;

	__int64 GANumber = 1000100000;//
	
	wstringstream FileNameStream;
	FileNameStream << InputDataFile << L"_" << NCharact.FactorNumber
				   << L"_" << TolerantRange << L"_" << TolerantNumber << CyclePara << L".txt";//<< L"750.txt";//
	InitialParaFile = FileNameStream.str();
	OutpuParaFile = FileNameStream.str();
	FileNameStream.clear();

	NCharact.InitTrainArray(CalcuNumberK, TolerantRange, TolerantNumber, InitialParaFile);
	
	if (false == bMode)
	{
		int NThreads = omp_get_num_procs() - 1;
		int NOut = 100000;
		unsigned __int64 nn = 0;
		vector<vector<int>> SingleTrainArray;		
		#ifndef _DEBUG
			#pragma omp parallel for num_threads(NThreads) firstprivate(SingleTrainArray) //, TargetPointResultArray)
		#endif
		for (__int64 n = __int64(GANumber * 0.0); n < GANumber; n++)
		{
			double NPercent = double(nn) / GANumber;
			int TrainFactorIndex = n % NCharact.TrainFactorLength;
 			NCharact.GAFunction(NPercent, TrainFactorIndex, SingleTrainArray, CalcuNumberK);

			vector<double> JudgeElement(2, 0.0);
			NCharact.ANOVA2(CalcuNumberK, TolerantRange, TolerantNumber, SingleTrainArray, JudgeElement);

			#pragma omp critical
			{
				NCharact.GAReprocess(NPercent, TrainFactorIndex, SingleTrainArray, JudgeElement);

				if(0 == nn % NOut)
				{
					wcout << nn << L" ";
					NCharact.OutputParaFile(OutpuParaFile);
				}
				nn++;
			}
		}
	}
	else
	{
		NCharact.TestLatestData(DateArray, TolerantRange, CalcuNumberK, 450);
	}

	/*
	const int HistogOxLength = 20;
	const int HistogDataLength = 20;
	const int HistogSize = 4;
	NCharact.HistogCoOx.insert(NCharact.HistogCoOx.end(), HistogOxLength, 0.0);
	vector<int> HistogTemp(HistogDataLength, 0);
	NCharact.HistogData.insert(NCharact.HistogData.end(), HistogSize, HistogTemp);
	NCharact.GenerateHistogDistr(CheckSize, TargetPointResultArray);
	*/

	OriginalDataArray.clear();
	DateArray.clear();
	NCharact.DataClear();

	string tempstring;
	cout << endl << "end.";
	cin >> tempstring;
	return 0;	
}
Ejemplo n.º 19
0
int sapporo::open(std::string kernelFile, int *devices, 
                  int nprocs, int order, int precision)  
{
  //Set the integration order
  integrationOrder      = order;
  integrationPrecision  = precision;

  cout << "Integration order used: " << integrationOrder << " (0=GRAPE5, 1=4th, 2=6th, 3=8th)\n";
  cout << "Integration precision used: " << precision << " (0=FLOAT, 1 = DOUBLESINGLE, 2=DOUBLE)\n";

  dev::context        contextTest;  //Only used to retrieve the number of devices

  int numDev = 0;
  
  
  #ifdef __OPENCL_DEV__
    numDev = contextTest.getDeviceCount(CL_DEVICE_TYPE_GPU, 0);
  #else
    numDev = contextTest.getDeviceCount();
  #endif

  cout << "Number of cpus available: " << omp_get_num_procs() << endl;
  cout << "Number of gpus available: " << numDev << endl;

  // create as many CPU threads as there are CUDA devices and create the contexts

  int numThread = abs(nprocs);

  if(numThread == 0)    //Use as many as available
  {
    numThread = numDev;
  }
  
  deviceList = new sapporo2::device*[numThread];
  
  numberOfGPUUsedBySapporo = numThread;

//   omp_set_num_threads(numThread);
  #pragma omp parallel num_threads(numberOfGPUUsedBySapporo)
  {
    //Create context for each thread
    unsigned int tid      = omp_get_thread_num();
    sapdevice             = new sapporo2::device();
     
    deviceList[tid] = sapdevice;

    //Let the driver try to get a device if nprocs < 0
    //Use 1...N if nprocs == 0
    //Use list if nprocs > 0
    int dev = -1;

    if(nprocs == 0) //Device ID is thread ID
    {
      dev = tid;
    }
    else if(nprocs > 0)
    {     
      dev = devices[tid]; //The user gave us a set of device ids
    }

    //Assign the device and load the kernels
    sapdevice->assignDevice(dev, integrationOrder);
    const char *gravityKernel = get_kernelName(integrationOrder, precision,sapdevice->sharedMemPerThread);
    sapdevice->loadComputeKernels(kernelFile.c_str(), gravityKernel);

    if(tid == 0)
    {
      nCUDAdevices = omp_get_num_threads();
    }
    
    //Allocate initial memory for 16k particles per device
    sapdevice->allocateMemory(16384, get_n_pipes());
    nj_max = 16384;    
  }//end pragma omp parallel

  //Used to store j-memory particle counters
  jCopyInformation.resize(nCUDAdevices);
  
  
  CPUThreshold = -1; //By Default GPU is always used
  
  
#if 0
  #ifdef CPU_SUPPORT
  
    const int nMaxTest = 2049;
    const int nMaxLoop = 2049;
    const int nIncrease = 16;
    //At the start of the program figure out at which point the GPU will be faster
    //than the host CPU. This can either be based on ni, nj, or on a combination 
    //of ni*nj = #interactions. Then if #interactions < GPUOptimal do host compute
    //otherwise do GPU compute. Stored in CPUThreshold
  
    //#pragma omp parallel
    {
      //First fill the ids with valid info otherwise testing might fail, if all ids are 0
      for(int i=0; i < nMaxTest; i++) 
      {
        if(i < NPIPES)
        {
          sapdevice->id_i[i]    = i;
          sapdevice->pos_i[i].x =  (1.0 - 2.0*drand48());
          sapdevice->pos_i[i].y =  (1.0 - 2.0*drand48());
          sapdevice->pos_i[i].z =  (1.0 - 2.0*drand48());
          sapdevice->pos_i[i].w =  1./1024;
          sapdevice->vel_i[i].x =  drand48() * 0.1;
          sapdevice->vel_i[i].y =  drand48() * 0.1;
          sapdevice->vel_i[i].z =  drand48() * 0.1;
        }
  
        if(i < nj_max)
        {
          sapdevice->id_j[i]     = i;
          sapdevice->pPos_j[i].x =  (1.0 - 2.0*drand48());
          sapdevice->pPos_j[i].y =  (1.0 - 2.0*drand48());
          sapdevice->pPos_j[i].z =  (1.0 - 2.0*drand48());
          sapdevice->pPos_j[i].w =  1./1024;
          sapdevice->pVel_j[i].x =  drand48() * 0.1;
          sapdevice->pVel_j[i].y =  drand48() * 0.1;
          sapdevice->pVel_j[i].z =  drand48() * 0.1;          
        }
      }
      
      
      //Some temp buffers, are being used multiple
      //times and contain only bogus data      
      double (*pos)[3]  = new double[nMaxTest][3];
      double (*vel)[3]  = new double[nMaxTest][3];
      double (*acc)[3]  = new double[nMaxTest][3];
      double (*jrk)[3]  = new double[nMaxTest][3];
      double  *tempBuff = new double[nMaxTest];
      
      double *timingMatrixGPU = new double[nMaxTest*nMaxTest];
      double *timingMatrixCPU = new double[nMaxTest*nMaxTest];
      
      //First call to initialize device
      evaluate_gravity(1, 1);            
      retrieve_i_particle_results(1);
      
      double tTime = 0;
      CPUThreshold = -1; //Negative to force GPU timings
      for(int k=0; k < nMaxLoop; k+=nIncrease) //number of i-particles
      {
        for(int m=0; m < nMaxLoop; m+=nIncrease) //number of j-particles
        {
          int kk=k, mm=m;
          if(k==0) kk = 1;
          if(m==0) mm = 1;
          
          timingMatrixGPU[m*nMaxTest+k] = 0;
          for(int n=0; n < 10; n++)
          {
            double t0 = get_time();
            set_time(tTime);//set time
            startGravCalc(mm,kk,
                          &sapdevice->id_i[0], pos,
                          vel,acc, acc,tempBuff,
                          1./ nMaxTest, tempBuff, NULL);
            getGravResults(mm,kk,
                          &sapdevice->id_i[0], pos,
                          vel, 1./ nMaxTest, NULL,
                          acc, jrk, acc, jrk, tempBuff,
                          NULL, tempBuff, false);
//             fprintf(stderr, "TEST DEV: Took: nj: %d  ni: %d \t %g\n", m, k,   get_time() - t0);
            
            timingMatrixGPU[m*nMaxTest+k] += get_time() - t0;
            tTime += 0.0001;
          }//for n
        }//for m
      }//for k
      
#if 0
      CPUThreshold = 10e10; //Huge to force CPU timings
      //First call outside loop, to boot-up openMP
      evaluate_gravity_host(1, 1);  
      evaluate_gravity_host_vector(1, 1);  
      
      tTime = 0;
      for(int k=1; k < nMaxLoop; k+=nIncrease)
      {
        for(int m=1; m < nMaxLoop; m+=nIncrease)
        {
          timingMatrixCPU[m*nMaxTest+k] = 0;
          for(int n=0; n < 10; n++)
          {         
            int kk=k, mm=m;
            if(k==0) kk = 1;
            if(m==0) mm = 1;            
            double t0 = get_time();
            set_time(tTime);//set time
            startGravCalc(mm,kk,
                          &sapdevice->id_i[0], pos,
                          vel,acc, acc,tempBuff,
                          1./ nMaxTest, tempBuff, NULL);
            getGravResults(mm,kk,
                          &sapdevice->id_i[0], pos,
                          vel, 1./ nMaxTest, NULL,
                          acc, jrk, acc, jrk, tempBuff,
                          NULL, tempBuff, false);
//             fprintf(stderr, "TEST CPU: Took: nj: %d  ni: %d \t %g\n", m, k,   get_time() - t0);
            timingMatrixCPU[m*nMaxTest+k] += get_time() - t0;
            tTime += 0.0001;  
          }//for n
        }//for m
      } //for k
    #endif  
    
    //Write timing data to file
    FILE *foutT = fopen("data.txt","w");
      //Print timing results GPU
      fprintf(stderr, "GPU timings:\nni");
      fprintf(foutT, "ni");
      for(int i=1; i < nMaxLoop; i+=nIncrease)
        fprintf(foutT, "\t%d", i);      
      fprintf(foutT, "\n");
      fprintf(foutT, "nj\n");
      
      for(int j=1; j < nMaxLoop; j+=nIncrease)
      {
        fprintf(foutT, "%d\t", j);
        for(int i=1; i < nMaxLoop; i+=nIncrease)
        {
          fprintf(foutT, "%f\t", timingMatrixGPU[j*nMaxTest+i]);
        }
        fprintf(foutT, "\n");
      }
      fclose(foutT);
      exit(0);
      fprintf(stderr, "\nCPU timings:\nni");
      for(int i=1; i < nMaxLoop; i+=nIncrease)
        fprintf(stderr, "\t%d", i);      
      fprintf(stderr, "\n");
      fprintf(stderr, "nj\n");
      
      for(int j=1; j < nMaxLoop; j+=nIncrease)
      {
        fprintf(stderr, "%d\t", j);
        for(int i=1; i < nMaxLoop; i+=nIncrease)
        {
          fprintf(stderr, "%f\t", timingMatrixCPU[j*nMaxTest+i]);
        }
        fprintf(stderr, "\n");
      }      
      
      
      fprintf(stderr, "GPU timings:\n");
           
      for(int j=1; j < nMaxLoop; j+=nIncrease)
      {        
        for(int i=1; i < nMaxLoop; i+=nIncrease)
        {
//           fprintf(stderr,"%f\t%f\t%f\t%f\n",
            fprintf(stderr,"%d\t%f\t%f\n", 
                  i*j,  timingMatrixGPU[j*nMaxTest+i],
                   timingMatrixCPU[j*nMaxTest+i]);
                  
//                   j / timingMatrixGPU[j*nMaxTest+i],
//                   i / timingMatrixGPU[j*nMaxTest+i],
//                   j / timingMatrixCPU[j*nMaxTest+i],
//                   i / timingMatrixCPU[j*nMaxTest+i]);          
        }//for i        
        fprintf(stderr, "\n");
      } //for j
      
      
      //TODO set some interaction count number that is the break-even point 
      //between CPU and GPU computations
      delete[] pos;
      delete[] vel;
      delete[] acc;
      delete[] jrk;
      delete[] tempBuff;
      delete[] timingMatrixGPU;
      delete[] timingMatrixCPU;      
    }
    
    
    exit(0);
  #endif //ifdef CPU support
#endif

  return 0;
}
Ejemplo n.º 20
0
Inputs read_CLI( int argc, char * argv[] )
{
	Inputs input;

	memset(&input, 0, sizeof(Inputs));
	
	// defaults to max threads on the system	
	input.nthreads = omp_get_num_procs();
	
	// defaults to 355 (corresponding to H-M Large benchmark)
	input.n_isotopes = 355;
	
	// defaults to 11303 (corresponding to H-M Large benchmark)
	input.n_gridpoints = 11303;
	
	// defaults to 15,000,000
	input.lookups = 15000000;
	
	// defaults to H-M Large benchmark
	input.HM = (char *) malloc( 6 * sizeof(char) );
	strcpy(input.HM, "small");
	/*
	input.HM[0] = 'l' ; 
	input.HM[1] = 'a' ; 
	input.HM[2] = 'r' ; 
	input.HM[3] = 'g' ; 
	input.HM[4] = 'e' ; 
	input.HM[5] = '\0';
	*/
	
#ifdef __USE_AMD_OCL__
	input.tloops = 1;
	input.run_cpu = false;
#endif
        input.savegrids = false;
        input.restoregrids = false;
        strcpy(input.file_name, "grids");

	// Check if user sets these
	int user_g = 0;
	
	// Collect Raw Input
	for( int i = 1; i < argc; i++ )
	{
		char * arg = argv[i];

		// nthreads (-t)
		if( strcmp(arg, "-t") == 0 )
		{
			if( ++i < argc )
				input.nthreads = atoi(argv[i]);
			else
				print_CLI_error();
		}
		// n_gridpoints (-g)
		else if( strcmp(arg, "-g") == 0 )
		{	
			if( ++i < argc )
			{
				user_g = 1;
				input.n_gridpoints = atoi(argv[i]);
			}
			else
				print_CLI_error();
		}
		// lookups (-l)
		else if( strcmp(arg, "-l") == 0 )
		{
			if( ++i < argc )
				input.lookups = atoi(argv[i]);
			else
				print_CLI_error();
		}
		// HM (-s)
		else if( strcmp(arg, "-s") == 0 )
		{	
			if( ++i < argc )
				input.HM = argv[i];
			else
				print_CLI_error();
		}
#ifdef __USE_AMD_OCL__
		else if( strcmp(arg, "-k") == 0 )
		{	
			if( ++i < argc )
				input.tloops = atoi(argv[i]);
			else
				print_CLI_error();
		}
		else if( strcmp(arg, "-c") == 0 )
		{
			input.run_cpu = true;
		}

#endif
                else if( strcmp(arg, "-v") == 0 )
                {
                        input.savegrids = true;
                }
                else if( strcmp(arg, "-r") == 0 )
                {
                        input.restoregrids = true;
                }
                else if( strcmp(arg, "-f") == 0 )
                {
			if( ++i < argc )
				strcpy(input.file_name,argv[i]);
			else
				print_CLI_error();
                }
                else
			print_CLI_error();
	}
       if(input.savegrids)
               input.restoregrids = false;

	// Validate Input

	// Validate nthreads
	if( input.nthreads < 1 )
		print_CLI_error();
	
	// Validate n_isotopes
	if( input.n_isotopes < 1 )
		print_CLI_error();
	
	// Validate n_gridpoints
	if( input.n_gridpoints < 1 )
		print_CLI_error();

	// Validate lookups
	if( input.lookups < 1 )
		print_CLI_error();
	
	// Validate HM size
	if( strcasecmp(input.HM, "small") != 0 &&
		strcasecmp(input.HM, "large") != 0 &&
		strcasecmp(input.HM, "XL") != 0 &&
		strcasecmp(input.HM, "XXL") != 0 )
		print_CLI_error();
	
	// Set HM size specific parameters
	// (defaults to large)
	if( strcasecmp(input.HM, "small") == 0 )
		input.n_isotopes = 68;
	else if( strcasecmp(input.HM, "XL") == 0 && user_g == 0 )
		input.n_gridpoints = 238847; // sized to make 120 GB XS data
	else if( strcasecmp(input.HM, "XXL") == 0 && user_g == 0 )
		input.n_gridpoints = 238847 * 2.1; // 252 GB XS data

	// Return input struct
	return input;
}
Ejemplo n.º 21
0
int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load_data, lua_State *L)
{
  double start_wtime = dt_get_wtime();

#ifndef __WIN32__
  if(getuid() == 0 || geteuid() == 0)
    printf(
        "WARNING: either your user id or the effective user id are 0. are you running darktable as root?\n");
#endif

#if defined(__SSE__)
  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#endif

  dt_set_signal_handlers();

#include "is_supported_platform.h"

  int sse2_supported = 0;

#ifdef HAVE_BUILTIN_CPU_SUPPORTS
  // NOTE: _may_i_use_cpu_feature() looks better, but only avaliable in ICC
  __builtin_cpu_init();
  sse2_supported = __builtin_cpu_supports("sse2");
#else
  sse2_supported = dt_detect_cpu_features() & CPU_FLAG_SSE2;
#endif
  if(!sse2_supported)
  {
    fprintf(stderr, "[dt_init] SSE2 instruction set is unavailable.\n");
    fprintf(stderr, "[dt_init] expect a LOT of functionality to be broken. you have been warned.\n");
  }

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD, 128 * 1024); /* use mmap() for large allocations */
#endif

  // make sure that stack/frame limits are good (musl)
  dt_set_rlimits();

  // we have to have our share dir in XDG_DATA_DIRS,
  // otherwise GTK+ won't find our logo for the about screen (and maybe other things)
  {
    const gchar *xdg_data_dirs = g_getenv("XDG_DATA_DIRS");
    gchar *new_xdg_data_dirs = NULL;
    gboolean set_env = TRUE;
    if(xdg_data_dirs != NULL && *xdg_data_dirs != '\0')
    {
      // check if DARKTABLE_SHAREDIR is already in there
      gboolean found = FALSE;
      gchar **tokens = g_strsplit(xdg_data_dirs, G_SEARCHPATH_SEPARATOR_S, 0);
      // xdg_data_dirs is neither NULL nor empty => tokens != NULL
      for(char **iter = tokens; *iter != NULL; iter++)
        if(!strcmp(DARKTABLE_SHAREDIR, *iter))
        {
          found = TRUE;
          break;
        }
      g_strfreev(tokens);
      if(found)
        set_env = FALSE;
      else
        new_xdg_data_dirs = g_strjoin(G_SEARCHPATH_SEPARATOR_S, DARKTABLE_SHAREDIR, xdg_data_dirs, NULL);
    }
    else
    {
#ifndef _WIN32
      // see http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html for a reason to use those as a
      // default
      if(!g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share")
         || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/local/share/")
         || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share") || !g_strcmp0(DARKTABLE_SHAREDIR, "/usr/share/"))
        new_xdg_data_dirs = g_strdup("/usr/local/share/" G_SEARCHPATH_SEPARATOR_S "/usr/share/");
      else
        new_xdg_data_dirs = g_strdup_printf("%s" G_SEARCHPATH_SEPARATOR_S "/usr/local/share/" G_SEARCHPATH_SEPARATOR_S
                                            "/usr/share/", DARKTABLE_SHAREDIR);
#else
      set_env = FALSE;
#endif
    }

    if(set_env) g_setenv("XDG_DATA_DIRS", new_xdg_data_dirs, 1);
    g_free(new_xdg_data_dirs);
  }

  setlocale(LC_ALL, "");
  bindtextdomain(GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset(GETTEXT_PACKAGE, "UTF-8");
  textdomain(GETTEXT_PACKAGE);

  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.start_wtime = start_wtime;

  darktable.progname = argv[0];

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)calloc(1, sizeof(dt_control_t));

  // database
  char *dbfilename_from_command = NULL;
  char *noiseprofiles_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

#ifdef HAVE_OPENCL
  gboolean exclude_opencl = FALSE;
  gboolean print_statistics = strcmp(argv[0], "darktable-cltest");
#endif

#ifdef USE_LUA
  char *lua_command = NULL;
#endif

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *config_override = NULL;
  for(int k = 1; k < argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
#ifdef USE_LUA
        const char *lua_api_version = strcmp(LUA_API_VERSION_SUFFIX, "") ?
                                      STR(LUA_API_VERSION_MAJOR) "."
                                      STR(LUA_API_VERSION_MINOR) "."
                                      STR(LUA_API_VERSION_PATCH) "-"
                                      LUA_API_VERSION_SUFFIX :
                                      STR(LUA_API_VERSION_MAJOR) "."
                                      STR(LUA_API_VERSION_MINOR) "."
                                      STR(LUA_API_VERSION_PATCH);
#endif
        printf("this is %s\ncopyright (c) 2009-%s johannes hanika\n" PACKAGE_BUGREPORT "\n\ncompile options:\n"
               "  bit depth is %s\n"
#ifdef _DEBUG
               "  debug build\n"
#else
               "  normal build\n"
#endif
#if defined(__SSE2__) && defined(__SSE__)
               "  SSE2 optimized codepath enabled\n"
#else
               "  SSE2 optimized codepath disabled\n"
#endif
#ifdef _OPENMP
               "  OpenMP support enabled\n"
#else
               "  OpenMP support disabled\n"
#endif

#ifdef HAVE_OPENCL
               "  OpenCL support enabled\n"
#else
               "  OpenCL support disabled\n"
#endif

#ifdef USE_LUA
               "  Lua support enabled, API version %s\n"
#else
               "  Lua support disabled\n"
#endif

#ifdef USE_COLORDGTK
               "  Colord support enabled\n"
#else
               "  Colord support disabled\n"
#endif

#ifdef HAVE_GPHOTO2
               "  gPhoto2 support enabled\n"
#else
               "  gPhoto2 support disabled\n"
#endif

#ifdef HAVE_GRAPHICSMAGICK
               "  GraphicsMagick support enabled\n"
#else
               "  GraphicsMagick support disabled\n"
#endif

#ifdef HAVE_OPENEXR
               "  OpenEXR support enabled\n"
#else
               "  OpenEXR support disabled\n"
#endif
               ,
               darktable_package_string,
               darktable_last_commit_year,
               (sizeof(void *) == 8 ? "64 bit" : sizeof(void *) == 4 ? "32 bit" : "unknown")
#if USE_LUA
                   ,
               lua_api_version
#endif
               );
        return 1;
      }
      else if(!strcmp(argv[k], "--library") && argc > k + 1)
      {
        dbfilename_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--datadir") && argc > k + 1)
      {
        datadir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--moduledir") && argc > k + 1)
      {
        moduledir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--tmpdir") && argc > k + 1)
      {
        tmpdir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--configdir") && argc > k + 1)
      {
        configdir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--cachedir") && argc > k + 1)
      {
        cachedir_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--localedir") && argc > k + 1)
      {
        bindtextdomain(GETTEXT_PACKAGE, argv[++k]);
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(argv[k][1] == 'd' && argc > k + 1)
      {
        if(!strcmp(argv[k + 1], "all"))
          darktable.unmuted = 0xffffffff; // enable all debug information
        else if(!strcmp(argv[k + 1], "cache"))
          darktable.unmuted |= DT_DEBUG_CACHE; // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k + 1], "control"))
          darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k + 1], "dev"))
          darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k + 1], "input"))
          darktable.unmuted |= DT_DEBUG_INPUT; // input devices
        else if(!strcmp(argv[k + 1], "camctl"))
          darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k + 1], "perf"))
          darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k + 1], "pwstorage"))
          darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k + 1], "opencl"))
          darktable.unmuted |= DT_DEBUG_OPENCL; // gpu accel via opencl
        else if(!strcmp(argv[k + 1], "sql"))
          darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k + 1], "memory"))
          darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k + 1], "lighttable"))
          darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k + 1], "nan"))
          darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else if(!strcmp(argv[k + 1], "masks"))
          darktable.unmuted |= DT_DEBUG_MASKS; // masks related stuff.
        else if(!strcmp(argv[k + 1], "lua"))
          darktable.unmuted |= DT_DEBUG_LUA; // lua errors are reported on console
        else if(!strcmp(argv[k + 1], "print"))
          darktable.unmuted |= DT_DEBUG_PRINT; // print errors are reported on console
        else if(!strcmp(argv[k + 1], "camsupport"))
          darktable.unmuted |= DT_DEBUG_CAMERA_SUPPORT; // camera support warnings are reported on console
        else
          return usage(argv[0]);
        k++;
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(argv[k][1] == 't' && argc > k + 1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k + 1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k++;
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--conf") && argc > k + 1)
      {
        gchar *keyval = g_strdup(argv[++k]), *c = keyval;
        argv[k-1] = NULL;
        argv[k] = NULL;
        gchar *end = keyval + strlen(keyval);
        while(*c != '=' && c < end) c++;
        if(*c == '=' && *(c + 1) != '\0')
        {
          *c++ = '\0';
          dt_conf_string_entry_t *entry = (dt_conf_string_entry_t *)g_malloc(sizeof(dt_conf_string_entry_t));
          entry->key = g_strdup(keyval);
          entry->value = g_strdup(c);
          config_override = g_slist_append(config_override, entry);
        }
        g_free(keyval);
      }
      else if(!strcmp(argv[k], "--noiseprofiles") && argc > k + 1)
      {
        noiseprofiles_from_command = argv[++k];
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--luacmd") && argc > k + 1)
      {
#ifdef USE_LUA
        lua_command = argv[++k];
#else
        ++k;
#endif
        argv[k-1] = NULL;
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--disable-opencl"))
      {
#ifdef HAVE_OPENCL
        exclude_opencl = TRUE;
#endif
        argv[k] = NULL;
      }
      else if(!strcmp(argv[k], "--"))
      {
        // "--" confuses the argument parser of glib/gtk. remove it.
        argv[k] = NULL;
        break;
      }
      else
        return usage(argv[0]); // fail on unrecognized options
    }
  }

  // remove the NULLs to not confuse gtk_init() later.
  for(int i = 1; i < argc; i++)
  {
    int k;
    for(k = i; k < argc; k++)
      if(argv[k] != NULL) break;

    if(k > i)
    {
      k -= i;
      for(int j = i + k; j < argc; j++)
      {
        argv[j-k] = argv[j];
        argv[j] = NULL;
      }
      argc -= k;
    }
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

  if(init_gui)
  {
    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // make sure that we have no stale global progress bar visible. thus it's run as early is possible
    dt_control_progress_init(darktable.control);
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    fprintf(stderr, "error: invalid temporary directory: %s\n", darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#ifdef USE_LUA
  dt_lua_init_early(L);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[PATH_MAX] = { 0 };
  dt_loc_get_user_config_dir(datadir, sizeof(datadir));
  char darktablerc[PATH_MAX] = { 0 };
  snprintf(darktablerc, sizeof(darktablerc), "%s/darktablerc", datadir);

  // initialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)calloc(1, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, darktablerc, config_override);
  g_slist_free_full(config_override, g_free);

  // set the interface language
  const gchar *lang = dt_conf_get_string("ui_last/gui_language");
#if defined(_WIN32)
  // get the default locale if no language preference was specified in the config file
  if(lang == NULL || lang[0] == '\0')
  {
    const wchar_t *wcLocaleName = NULL;
    wcLocaleName = dtwin_get_locale();
    if(wcLocaleName != NULL)
    {
      gchar *langLocale;
      langLocale = g_utf16_to_utf8(wcLocaleName, -1, NULL, NULL, NULL);
      if(langLocale != NULL)
      {
        g_free((gchar *)lang);
        lang = g_strdup(langLocale);
      }
    }
  }
#endif // defined (_WIN32)

  if(lang != NULL && lang[0] != '\0')
  {
    g_setenv("LANGUAGE", lang, 1);
    if(setlocale(LC_ALL, lang) != NULL) gtk_disable_setlocale();
    setlocale(LC_MESSAGES, lang);
    g_setenv("LANG", lang, 1);
  }
  g_free((gchar *)lang);

  // we need this REALLY early so that error messages can be shown, however after gtk_disable_setlocale
  if(init_gui)
  {
#ifdef GDK_WINDOWING_WAYLAND
    // There are currently bad interactions with Wayland (drop-downs
    // are very narrow, scroll events lost). Until this is fixed, give
    // priority to the XWayland backend for Wayland users.
    gdk_set_allowed_backends("x11,*");
#endif
    gtk_init(&argc, &argv);
  }

  // detect cpu features and decide which codepaths to enable
  dt_codepaths_init();

  // get the list of color profiles
  darktable.color_profiles = dt_colorspaces_init();

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command, load_data);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(!dt_database_get_lock_acquired(darktable.db))
  {
    gboolean image_loaded_elsewhere = FALSE;
#ifndef MAC_INTEGRATION
    // send the images to the other instance via dbus
    fprintf(stderr, "trying to open the images in the running instance\n");

    GDBusConnection *connection = NULL;
    for(int i = 1; i < argc; i++)
    {
      // make the filename absolute ...
      if(argv[i] == NULL || *argv[i] == '\0') continue;
      gchar *filename = dt_util_normalize_path(argv[i]);
      if(filename == NULL) continue;
      if(!connection) connection = g_bus_get_sync(G_BUS_TYPE_SESSION, NULL, NULL);
      // ... and send it to the running instance of darktable
      image_loaded_elsewhere = g_dbus_connection_call_sync(connection, "org.darktable.service", "/darktable",
                                                           "org.darktable.service.Remote", "Open",
                                                           g_variant_new("(s)", filename), NULL,
                                                           G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL) != NULL;
      g_free(filename);
    }
    if(connection) g_object_unref(connection);
#endif

    if(!image_loaded_elsewhere) dt_database_show_error(darktable.db);

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Make sure that the database and xmp files are in sync
  // We need conf and db to be up and running for that which is the case here.
  // FIXME: is this also useful in non-gui mode?
  GList *changed_xmp_files = NULL;
  if(init_gui && dt_conf_get_bool("run_crawler_on_start"))
  {
    changed_xmp_files = dt_control_crawler_run();
  }

  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
      dt_gui_presets_init(); // init preset db schema.
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection = dt_collection_new(NULL);

  /* initialize selection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

  // Initialize the password storage engine
  darktable.pwstorage = dt_pwstorage_new();

  darktable.guides = dt_guides_init();

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);

  // *SIGH*
  dt_set_signal_handlers();
#endif

  darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t));
#ifdef HAVE_OPENCL
  dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics);
#endif

  darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  darktable.noiseprofile_parser = dt_noiseprofile_init(noiseprofiles_from_command);

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)calloc(1, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)calloc(1, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui)) return 1;
    dt_bauhaus_init();
  }
  else
    darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)calloc(1, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  // check whether we were able to load darkroom view. if we failed, we'll crash everywhere later on.
  if(!darktable.develop) return 1;

  darktable.imageio = (dt_imageio_t *)calloc(1, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
#ifdef HAVE_GPHOTO2
    // Initialize the camera control.
    // this is done late so that the gui can react to the signal sent but before switching to lighttable!
    darktable.camctl = dt_camctl_new();
#endif

    darktable.lib = (dt_lib_t *)calloc(1, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_gui_gtk_load_config();

    // init the gui part of views
    dt_view_manager_gui_init(darktable.view_manager);
    // Loading the keybindings
    char keyfile[PATH_MAX] = { 0 };

    // First dump the default keymapping
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, sizeof(keyfile), "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // initialize undo struct
    darktable.undo = dt_undo_init();
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  dt_image_local_copy_synch();

/* init lua last, since it's user made stuff it must be in the real environment */
#ifdef USE_LUA
  dt_lua_init(darktable.lua_state.state, lua_command);
#endif

  if(init_gui)
  {
    const char *mode = "lighttable";
    // april 1st: you have to earn using dt first! or know that you can switch views with keyboard shortcuts
    time_t now;
    time(&now);
    struct tm lt;
    localtime_r(&now, &lt);
    if(lt.tm_mon == 3 && lt.tm_mday == 1) mode = "knight";
    // we have to call dt_ctl_switch_mode_to() here already to not run into a lua deadlock.
    // having another call later is ok
    dt_ctl_switch_mode_to(mode);

#ifndef MAC_INTEGRATION
    // load image(s) specified on cmdline.
    // this has to happen after lua is initialized as image import can run lua code
    // If only one image is listed, attempt to load it in darkroom
    int last_id = 0;
    gboolean only_single_images = TRUE;
    int loaded_images = 0;

    for(int i = 1; i < argc; i++)
    {
      gboolean single_image = FALSE;
      if(argv[i] == NULL || *argv[i] == '\0') continue;
      int new_id = dt_load_from_string(argv[i], FALSE, &single_image);
      if(new_id > 0)
      {
        last_id = new_id;
        loaded_images++;
        if(!single_image) only_single_images = FALSE;
      }
    }

    if(loaded_images == 1 && only_single_images)
    {
      dt_control_set_mouse_over_id(last_id);
      dt_ctl_switch_mode_to("darkroom");
    }
#endif
  }

  // last but not least construct the popup that asks the user about images whose xmp files are newer than the
  // db entry
  if(init_gui && changed_xmp_files)
  {
    dt_control_crawler_show_image_list(changed_xmp_files);
  }

  dt_print(DT_DEBUG_CONTROL, "[init] startup took %f seconds\n", dt_get_wtime() - start_wtime);

  return 0;
}
Ejemplo n.º 22
0
Archivo: test2.c Proyecto: huahbo/lis
LIS_INT main(LIS_INT argc, char* argv[])
{
	LIS_MATRIX A0,A;
	LIS_VECTOR x,b,u;
	LIS_SOLVER solver;
	LIS_INT m,n,nn,nnz;
	LIS_INT	i,j,ii,jj,ctr;
	LIS_INT	is,ie;
	LIS_INT	nprocs,my_rank;
	int int_nprocs,int_my_rank;
	LIS_INT	nsol;
	LIS_INT	err,iter,mtype,iter_double,iter_quad;
	double time,itime,ptime,p_c_time,p_i_time;
	LIS_REAL resid;
	char solvername[128];
	LIS_INT	*ptr,*index;
	LIS_SCALAR *value;


	LIS_DEBUG_FUNC_IN;


	lis_initialize(&argc, &argv);

	#ifdef USE_MPI
	        MPI_Comm_size(MPI_COMM_WORLD,&int_nprocs);
		MPI_Comm_rank(MPI_COMM_WORLD,&int_my_rank);
		nprocs = int_nprocs;
		my_rank = int_my_rank;
	#else
		nprocs  = 1;
		my_rank = 0;
	#endif

	if( argc < 6 )
	{
	  if( my_rank==0 ) 
{
	      printf("Usage: %s m n matrix_type solution_filename rhistory_filename [options]\n", argv[0]);
 }
	  CHKERR(1);
	}

	m  = atoi(argv[1]);
	n  = atoi(argv[2]);
	mtype  = atoi(argv[3]);
	if( m<=0 || n<=0 )
	{
#ifdef _LONGLONG
	  if( my_rank==0 ) printf("m=%lld <=0 or n=%lld <=0\n",m,n);
#else
	  if( my_rank==0 ) printf("m=%d <=0 or n=%d <=0\n",m,n);
#endif
	  CHKERR(1);
	}
	
	if( my_rank==0 )
	  {
	    printf("\n");
#ifdef _LONGLONG
	    printf("number of processes = %lld\n",nprocs);
#else
	    printf("number of processes = %d\n",nprocs);
#endif
	  }

#ifdef _OPENMP
	if( my_rank==0 )
	  {
#ifdef _LONGLONG
	    printf("max number of threads = %lld\n",omp_get_num_procs());
	    printf("number of threads = %lld\n",omp_get_max_threads());
#else
	    printf("max number of threads = %d\n",omp_get_num_procs());
	    printf("number of threads = %d\n",omp_get_max_threads());
#endif
	  }
#endif
		
	/* create matrix and vectors */
	nn = m*n;
	err = lis_matrix_create(LIS_COMM_WORLD,&A);
	err = lis_matrix_set_size(A,0,nn);
	CHKERR(err);

	ptr   = (LIS_INT *)malloc((A->n+1)*sizeof(LIS_INT));
	if( ptr==NULL ) CHKERR(1);
	index = (LIS_INT *)malloc(5*A->n*sizeof(LIS_INT));
	if( index==NULL ) CHKERR(1);
	value = (LIS_SCALAR *)malloc(5*A->n*sizeof(LIS_SCALAR));
	if( value==NULL ) CHKERR(1);

	lis_matrix_get_range(A,&is,&ie);
	ctr = 0;
	for(ii=is;ii<ie;ii++)
	{
		i = ii/m;
		j = ii - i*m;
		if( i>0 )   { jj = ii - m; index[ctr] = jj; value[ctr++] = -1.0;}
		if( i<n-1 ) { jj = ii + m; index[ctr] = jj; value[ctr++] = -1.0;}
		if( j>0 )   { jj = ii - 1; index[ctr] = jj; value[ctr++] = -1.0;}
		if( j<m-1 ) { jj = ii + 1; index[ctr] = jj; value[ctr++] = -1.0;}
		index[ctr] = ii; value[ctr++] = 4.0;
		ptr[ii-is+1] = ctr;
	}
	ptr[0] = 0;
	err = lis_matrix_set_csr(ptr[ie-is],ptr,index,value,A);
	CHKERR(err);
	err = lis_matrix_assemble(A);
	CHKERR(err);

	nnz = A->nnz;
#ifdef USE_MPI
	MPI_Allreduce(&nnz,&i,1,LIS_MPI_INT,MPI_SUM,A->comm);
	nnz   = i;
#endif

#ifdef _LONGLONG
	if( my_rank==0 ) printf("matrix size = %lld x %lld (%lld nonzero entries)\n\n",nn,nn,nnz);
#else
	if( my_rank==0 ) printf("matrix size = %d x %d (%d nonzero entries)\n\n",nn,nn,nnz);
#endif

	err = lis_matrix_duplicate(A,&A0);
	CHKERR(err);
	lis_matrix_set_type(A0,mtype);
	err = lis_matrix_convert(A,A0);
	CHKERR(err);
	lis_matrix_destroy(A);
	A = A0;

	err = lis_vector_duplicate(A,&u);
	CHKERR(err);
	err = lis_vector_duplicate(A,&b);
	CHKERR(err);
	err = lis_vector_duplicate(A,&x);
	CHKERR(err);

	err = lis_vector_set_all(1.0,u);
	lis_matvec(A,u,b);

	err = lis_solver_create(&solver); CHKERR(err);
	lis_solver_set_option("-print mem",solver);
	lis_solver_set_optionC(solver);

	err = lis_solve(A,b,x,solver);
	CHKERR(err);
	lis_solver_get_iterex(solver,&iter,&iter_double,&iter_quad);
	lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
	lis_solver_get_residualnorm(solver,&resid);
	lis_solver_get_solver(solver,&nsol);
	lis_solver_get_solvername(nsol,solvername);
	if( my_rank==0 )
	{

#ifdef _LONGLONG
#ifdef _LONG__DOUBLE
		printf("%s: number of iterations = %lld \n",solvername, iter);
#else
		printf("%s: number of iterations = %lld (double = %lld, quad = %lld)\n",solvername,iter, iter_double, iter_quad);
#endif
#else
#ifdef _LONG__DOUBLE
		printf("%s: number of iterations = %d \n",solvername, iter);
#else
		printf("%s: number of iterations = %d (double = %d, quad = %d)\n",solvername,iter, iter_double, iter_quad);
#endif
#endif
		printf("%s: elapsed time         = %e sec.\n",solvername,time);
		printf("%s:   preconditioner     = %e sec.\n",solvername, ptime);
		printf("%s:     matrix creation  = %e sec.\n",solvername, p_c_time);
		printf("%s:   linear solver      = %e sec.\n",solvername, itime);
#ifdef _LONG__DOUBLE
		printf("%s: relative residual    = %Le\n\n",solvername,resid);
#else
		printf("%s: relative residual    = %e\n\n",solvername,resid);
#endif
	}

	/* write solution */
	lis_output_vector(x,LIS_FMT_MM,argv[4]);

	/* write residual history */
	lis_solver_output_rhistory(solver, argv[5]); 

	lis_solver_destroy(solver);
	lis_matrix_destroy(A);
	lis_vector_destroy(b);
	lis_vector_destroy(x);
	lis_vector_destroy(u);

	lis_finalize();

	LIS_DEBUG_FUNC_OUT;
	return 0;
}
Ejemplo n.º 23
0
void FT_ProExpn_VNA()
{
  int numprocs,myid,ID,tag=999;
  int count,NumSpe;
  int L,i,kj;
  int Lspe,spe,GL,Mul;
  int RestartRead_Succeed;
  double Sr,Dr;
  double norm_k,h,dum0;
  double rmin,rmax,r,sum;
  double kmin,kmax,Sk,Dk;
  double RGL[GL_Mesh + 2];
  double *SumTmp;
  double tmp0,tmp1;
  double **SphB;
  double *tmp_SphB,*tmp_SphBp;
  double TStime, TEtime;
  /* for MPI */
  MPI_Status stat;
  MPI_Request request;
  /* for OpenMP */
  int OMPID,Nthrds,Nprocs;

  char fileFT[YOUSO10];
  char operate[300];
  FILE *fp;
  size_t size; 

  dtime(&TStime);

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  if (myid==Host_ID && 0<level_stdout) printf("<FT_ProExpn_VNA>  Fourier transform of VNA separable projectors\n");

  RestartRead_Succeed = 0;

  /***********************************************************
     In case of Scf_RestartFromFile==1, read Spe_VNA_Bessel
  ***********************************************************/

  if (Scf_RestartFromFile){

    /****************************************************
         regenerate radial grids in the k-space
         for the MPI calculation
    ****************************************************/

    for (kj=0; kj<GL_Mesh; kj++){
      kmin = Radial_kmin;
      kmax = PAO_Nkmax;
      Sk = kmax + kmin;
      Dk = kmax - kmin;
      norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk);
      GL_NormK[kj] = norm_k;
    }

    /***********************************************************
                        read Spe_VNA_Bessel
    ***********************************************************/

    sprintf(fileFT,"%s%s_rst/%s.ftPEvna",filepath,filename,filename);

    if ((fp = fopen(fileFT,"rb")) != NULL){

      RestartRead_Succeed = 1;

      for (spe=0; spe<SpeciesNum; spe++){
        for (L=0; L<=List_YOUSO[35]; L++){
	  for (Mul=0; Mul<List_YOUSO[34]; Mul++){

	    size = fread(&Spe_VNA_Bessel[spe][L][Mul][0],sizeof(double),GL_Mesh,fp);
	    if (size!=GL_Mesh) RestartRead_Succeed = 0;
	  }
        }
      }

      fclose(fp);
    }
    else{
      printf("Could not open a file %s in FT_ProExpn_VNA\n",fileFT);
    }
  }

  /***********************************************************
     if (RestartRead_Succeed==0), calculate Spe_VNA_Bessel
  ***********************************************************/

  if (RestartRead_Succeed==0){

    for (Lspe=0; Lspe<MSpeciesNum; Lspe++){

      spe = Species_Top[myid] + Lspe;

      /* initalize */
      /* tabulation on Gauss-Legendre radial grid */

      rmin = Spe_VPS_RV[spe][0];
      rmax = Spe_Atom_Cut1[spe] + 0.5;
      Sr = rmax + rmin;
      Dr = rmax - rmin;
      for (i=0; i<GL_Mesh; i++){
	RGL[i] = 0.50*(Dr*GL_Abscissae[i] + Sr);
      }

      kmin = Radial_kmin;
      kmax = PAO_Nkmax;
      Sk = kmax + kmin;
      Dk = kmax - kmin;

      /* loop for kj */

#pragma omp parallel shared(List_YOUSO,GL_Weight,GL_Abscissae,Dr,Dk,Sk,RGL,Projector_VNA,Spe_VPS_RV,Spe_Num_Mesh_VPS,Spe_VNA_Bessel)  private(SumTmp,SphB,tmp_SphB,tmp_SphBp,OMPID,Nthrds,Nprocs,kj,norm_k,i,r,L,Mul,tmp0,dum0)

      {

	/* allocate arrays */

	SumTmp = (double*)malloc(sizeof(double)*List_YOUSO[34]);

	SphB = (double**)malloc(sizeof(double*)*(List_YOUSO[35]+3));
	for(L=0; L<(List_YOUSO[35]+3); L++){ 
	  SphB[L] = (double*)malloc(sizeof(double)*GL_Mesh);
	}

	tmp_SphB  = (double*)malloc(sizeof(double)*(List_YOUSO[35]+3));
	tmp_SphBp = (double*)malloc(sizeof(double)*(List_YOUSO[35]+3));

	/* get info. on OpenMP */ 

	OMPID = omp_get_thread_num();
	Nthrds = omp_get_num_threads();
	Nprocs = omp_get_num_procs();

	for ( kj=OMPID; kj<GL_Mesh; kj+=Nthrds ){

	  norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk);

	  /* calculate SphB */

	  for (i=0; i<GL_Mesh; i++){

	    r = RGL[i];
	    Spherical_Bessel(norm_k*r,List_YOUSO[35],tmp_SphB,tmp_SphBp);

	    for(L=0; L<=List_YOUSO[35]; L++){ 
	      SphB[L][i]  =  tmp_SphB[L]; 
	    }
	  }

	  /* loop for L */
 
	  for (L=0; L<=List_YOUSO[35]; L++){

	    /****************************************************
                      \int jL(k*r)RL r^2 dr 
	    ****************************************************/

	    for (Mul=0; Mul<List_YOUSO[34]; Mul++) SumTmp[Mul] = 0.0;

	    /* Gauss-Legendre quadrature */

	    for (i=0; i<GL_Mesh; i++){

	      r = RGL[i];

	      tmp0 = r*r*GL_Weight[i]*SphB[L][i];
	      for (Mul=0; Mul<List_YOUSO[34]; Mul++){
		dum0 = PhiF(r, Projector_VNA[spe][L][Mul], Spe_VPS_RV[spe], Spe_Num_Mesh_VPS[spe]);   
		SumTmp[Mul] += dum0*tmp0;
	      }
	    }

	    for (Mul=0; Mul<List_YOUSO[34]; Mul++){
	      Spe_VNA_Bessel[spe][L][Mul][kj] = 0.5*Dr*SumTmp[Mul];
	    }

	  } /* L */

	} /* kj */ 

	/* free arrays */

	free(SumTmp);

	for(L=0; L<(List_YOUSO[35]+3); L++){ 
	  free(SphB[L]);
	}
	free(SphB);

	free(tmp_SphB);
	free(tmp_SphBp);

#pragma omp flush(Spe_VNA_Bessel)

      } /* #pragma omp parallel */
    } /* Lspe */

    /****************************************************
         regenerate radial grids in the k-space
         for the MPI calculation
    ****************************************************/

    for (kj=0; kj<GL_Mesh; kj++){
      kmin = Radial_kmin;
      kmax = PAO_Nkmax;
      Sk = kmax + kmin;
      Dk = kmax - kmin;
      norm_k = 0.50*(Dk*GL_Abscissae[kj] + Sk);
      GL_NormK[kj] = norm_k;
    }

    /***********************************************************
        sending and receiving of Spe_VNA_Bessel by MPI
    ***********************************************************/

    for (ID=0; ID<Num_Procs2; ID++){
      NumSpe = Species_End[ID] - Species_Top[ID] + 1;
      for (Lspe=0; Lspe<NumSpe; Lspe++){
	spe = Species_Top[ID] + Lspe;
	for (L=0; L<=List_YOUSO[35]; L++){
	  for (Mul=0; Mul<List_YOUSO[34]; Mul++){
	    MPI_Bcast(&Spe_VNA_Bessel[spe][L][Mul][0],
		      GL_Mesh,MPI_DOUBLE,ID,mpi_comm_level1);
	  }
	}
      }
    }

    /***********************************************************
                      save Spe_VNA_Bessel
    ***********************************************************/

    if (myid==Host_ID){

      sprintf(fileFT,"%s%s_rst/%s.ftPEvna",filepath,filename,filename);

      if ((fp = fopen(fileFT,"wb")) != NULL){

	for (spe=0; spe<SpeciesNum; spe++){
	  for (L=0; L<=List_YOUSO[35]; L++){
	    for (Mul=0; Mul<List_YOUSO[34]; Mul++){
	      fwrite(&Spe_VNA_Bessel[spe][L][Mul][0],sizeof(double),GL_Mesh,fp);
	    }
	  }
	}

	fclose(fp);
      }
      else{
	printf("Could not open a file %s in FT_ProExpn_VNA\n",fileFT);
      }
    }

  } /* if (RestartRead_Succeed==0) */

  /***********************************************************
                         elapsed time
  ***********************************************************/

  dtime(&TEtime);

  /*
  printf("myid=%2d Elapsed Time (s) = %15.12f\n",myid,TEtime-TStime);
  MPI_Finalize();
  exit(0);
  */

}
Ejemplo n.º 24
0
int dt_init(int argc, char *argv[], const int init_gui)
{
  // make everything go a lot faster.
  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#ifndef __APPLE__
  _dt_sigsegv_old_handler = signal(SIGSEGV,&_dt_sigsegv_handler);
#endif

#ifndef __SSE2__
  fprintf(stderr, "[dt_init] unfortunately we depend on SSE2 instructions at this time.\n");
  fprintf(stderr, "[dt_init] please contribute a backport patch (or buy a newer processor).\n");
  return 1;
#endif

#ifdef M_MMAP_THRESHOLD
  mallopt(M_MMAP_THRESHOLD,128*1024) ; /* use mmap() for large allocations */
#endif

  bindtextdomain (GETTEXT_PACKAGE, DARKTABLE_LOCALEDIR);
  bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
  textdomain (GETTEXT_PACKAGE);


  // init all pointers to 0:
  memset(&darktable, 0, sizeof(darktable_t));

  darktable.progname = argv[0];

  // database
  gchar *dbfilename_from_command = NULL;
  char *datadir_from_command = NULL;
  char *moduledir_from_command = NULL;
  char *tmpdir_from_command = NULL;
  char *configdir_from_command = NULL;
  char *cachedir_from_command = NULL;

  darktable.num_openmp_threads = 1;
#ifdef _OPENMP
  darktable.num_openmp_threads = omp_get_num_procs();
#endif
  darktable.unmuted = 0;
  GSList *images_to_load = NULL;
  for(int k=1; k<argc; k++)
  {
    if(argv[k][0] == '-')
    {
      if(!strcmp(argv[k], "--help"))
      {
        return usage(argv[0]);
      }
      if(!strcmp(argv[k], "-h"))
      {
        return usage(argv[0]);
      }
      else if(!strcmp(argv[k], "--version"))
      {
        printf("this is "PACKAGE_STRING"\ncopyright (c) 2009-2013 johannes hanika\n"PACKAGE_BUGREPORT"\n");
        return 1;
      }
      else if(!strcmp(argv[k], "--library"))
      {
        dbfilename_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--datadir"))
      {
        datadir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--moduledir"))
      {
        moduledir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--tmpdir"))
      {
        tmpdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--configdir"))
      {
        configdir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--cachedir"))
      {
        cachedir_from_command = argv[++k];
      }
      else if(!strcmp(argv[k], "--localedir"))
      {
        bindtextdomain (GETTEXT_PACKAGE, argv[++k]);
      }
      else if(argv[k][1] == 'd' && argc > k+1)
      {
        if(!strcmp(argv[k+1], "all"))             darktable.unmuted = 0xffffffff;   // enable all debug information
        else if(!strcmp(argv[k+1], "cache"))      darktable.unmuted |= DT_DEBUG_CACHE;   // enable debugging for lib/film/cache module
        else if(!strcmp(argv[k+1], "control"))    darktable.unmuted |= DT_DEBUG_CONTROL; // enable debugging for scheduler module
        else if(!strcmp(argv[k+1], "dev"))        darktable.unmuted |= DT_DEBUG_DEV; // develop module
        else if(!strcmp(argv[k+1], "fswatch"))    darktable.unmuted |= DT_DEBUG_FSWATCH; // fswatch module
        else if(!strcmp(argv[k+1], "camctl"))     darktable.unmuted |= DT_DEBUG_CAMCTL; // camera control module
        else if(!strcmp(argv[k+1], "perf"))       darktable.unmuted |= DT_DEBUG_PERF; // performance measurements
        else if(!strcmp(argv[k+1], "pwstorage"))  darktable.unmuted |= DT_DEBUG_PWSTORAGE; // pwstorage module
        else if(!strcmp(argv[k+1], "opencl"))     darktable.unmuted |= DT_DEBUG_OPENCL;    // gpu accel via opencl
        else if(!strcmp(argv[k+1], "sql"))        darktable.unmuted |= DT_DEBUG_SQL; // SQLite3 queries
        else if(!strcmp(argv[k+1], "memory"))     darktable.unmuted |= DT_DEBUG_MEMORY; // some stats on mem usage now and then.
        else if(!strcmp(argv[k+1], "lighttable")) darktable.unmuted |= DT_DEBUG_LIGHTTABLE; // lighttable related stuff.
        else if(!strcmp(argv[k+1], "nan"))        darktable.unmuted |= DT_DEBUG_NAN; // check for NANs when processing the pipe.
        else return usage(argv[0]);
        k ++;
      }
      else if(argv[k][1] == 't' && argc > k+1)
      {
        darktable.num_openmp_threads = CLAMP(atol(argv[k+1]), 1, 100);
        printf("[dt_init] using %d threads for openmp parallel sections\n", darktable.num_openmp_threads);
        k ++;
      }
    }
#ifndef MAC_INTEGRATION
    else
    {
      images_to_load = g_slist_append(images_to_load, argv[k]);
    }
#endif
  }

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] at startup\n");
    dt_print_mem_usage();
  }

#ifdef _OPENMP
  omp_set_num_threads(darktable.num_openmp_threads);
#endif
  dt_loc_init_datadir(datadir_from_command);
  dt_loc_init_plugindir(moduledir_from_command);
  if(dt_loc_init_tmp_dir(tmpdir_from_command))
  {
    printf(_("ERROR : invalid temporary directory : %s\n"),darktable.tmpdir);
    return usage(argv[0]);
  }
  dt_loc_init_user_config_dir(configdir_from_command);
  dt_loc_init_user_cache_dir(cachedir_from_command);

#if !GLIB_CHECK_VERSION(2, 35, 0)
  g_type_init();
#endif

  // does not work, as gtk is not inited yet.
  // even if it were, it's a super bad idea to invoke gtk stuff from
  // a signal handler.
  /* check cput caps */
  // dt_check_cpu(argc,argv);

#ifdef HAVE_GEGL
  char geglpath[DT_MAX_PATH_LEN];
  char datadir[DT_MAX_PATH_LEN];
  dt_loc_get_datadir(datadir, DT_MAX_PATH_LEN);
  snprintf(geglpath, DT_MAX_PATH_LEN, "%s/gegl:/usr/lib/gegl-0.0", datadir);
  (void)setenv("GEGL_PATH", geglpath, 1);
  gegl_init(&argc, &argv);
#endif

  // thread-safe init:
  dt_exif_init();
  char datadir[DT_MAX_PATH_LEN];
  dt_loc_get_user_config_dir (datadir,DT_MAX_PATH_LEN);
  char filename[DT_MAX_PATH_LEN];
  snprintf(filename, DT_MAX_PATH_LEN, "%s/darktablerc", datadir);

  // intialize the config backend. this needs to be done first...
  darktable.conf = (dt_conf_t *)malloc(sizeof(dt_conf_t));
  memset(darktable.conf, 0, sizeof(dt_conf_t));
  dt_conf_init(darktable.conf, filename);

  // set the interface language
  const gchar* lang = dt_conf_get_string("ui_last/gui_language");
  if(lang != NULL && lang[0] != '\0')
  {
    if(setlocale(LC_ALL, lang) != NULL)
      gtk_disable_setlocale();
  }

  // initialize the database
  darktable.db = dt_database_init(dbfilename_from_command);
  if(darktable.db == NULL)
  {
    printf("ERROR : cannot open database\n");
    return 1;
  }
  else if(dt_database_get_already_locked(darktable.db))
  {
    // send the images to the other instance via dbus
    if(images_to_load)
    {
      GSList *p = images_to_load;

      // get a connection!
      GDBusConnection *connection = g_bus_get_sync(G_BUS_TYPE_SESSION,NULL, NULL);

      while (p != NULL)
      {
        // make the filename absolute ...
        gchar *filename = dt_make_path_absolute((gchar*)p->data);
        if(filename == NULL) continue;
        // ... and send it to the running instance of darktable
        g_dbus_connection_call_sync(connection,
                                    "org.darktable.service",
                                    "/darktable",
                                    "org.darktable.service.Remote",
                                    "Open",
                                    g_variant_new ("(s)", filename),
                                    NULL,
                                    G_DBUS_CALL_FLAGS_NONE,
                                    -1,
                                    NULL,
                                    NULL);
        p = g_slist_next(p);
        g_free(filename);
      }

      g_slist_free(images_to_load);
      g_object_unref(connection);
    }

    return 1;
  }

  // Initialize the signal system
  darktable.signals = dt_control_signal_init();

  // Initialize the filesystem watcher
  darktable.fswatch=dt_fswatch_new();

#ifdef HAVE_GPHOTO2
  // Initialize the camera control
  darktable.camctl=dt_camctl_new();
#endif

  // get max lighttable thumbnail size:
  darktable.thumbnail_width  = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_width"),  200, 3000);
  darktable.thumbnail_height = CLAMPS(dt_conf_get_int("plugins/lighttable/thumbnail_height"), 200, 3000);
  // and make sure it can be mip-mapped all the way from mip4 to mip0
  darktable.thumbnail_width  /= 16;
  darktable.thumbnail_width  *= 16;
  darktable.thumbnail_height /= 16;
  darktable.thumbnail_height *= 16;

  // Initialize the password storage engine
  darktable.pwstorage=dt_pwstorage_new();

  // FIXME: move there into dt_database_t
  dt_pthread_mutex_init(&(darktable.db_insert), NULL);
  dt_pthread_mutex_init(&(darktable.plugin_threadsafe), NULL);
  dt_pthread_mutex_init(&(darktable.capabilities_threadsafe), NULL);
  darktable.control = (dt_control_t *)malloc(sizeof(dt_control_t));
  memset(darktable.control, 0, sizeof(dt_control_t));
  if(init_gui)
  {
    dt_control_init(darktable.control);
  }
  else
  {
    // this is in memory, so schema can't exist yet.
    if(dbfilename_from_command && !strcmp(dbfilename_from_command, ":memory:"))
    {
      dt_control_create_database_schema();
      dt_gui_presets_init(); // also init preset db schema.
    }
    darktable.control->running = 0;
    darktable.control->accelerators = NULL;
    dt_pthread_mutex_init(&darktable.control->run_mutex, NULL);
  }

  // initialize collection query
  darktable.collection_listeners = NULL;
  darktable.collection = dt_collection_new(NULL);

  /* initialize sellection */
  darktable.selection = dt_selection_new();

  /* capabilities set to NULL */
  darktable.capabilities = NULL;

#ifdef HAVE_GRAPHICSMAGICK
  /* GraphicsMagick init */
  InitializeMagick(darktable.progname);
#endif

  darktable.opencl = (dt_opencl_t *)malloc(sizeof(dt_opencl_t));
  memset(darktable.opencl, 0, sizeof(dt_opencl_t));
  dt_opencl_init(darktable.opencl, argc, argv);

  darktable.blendop = (dt_blendop_t *)malloc(sizeof(dt_blendop_t));
  memset(darktable.blendop, 0, sizeof(dt_blendop_t));
  dt_develop_blend_init(darktable.blendop);

  darktable.points = (dt_points_t *)malloc(sizeof(dt_points_t));
  memset(darktable.points, 0, sizeof(dt_points_t));
  dt_points_init(darktable.points, dt_get_num_threads());

  // must come before mipmap_cache, because that one will need to access
  // image dimensions stored in here:
  darktable.image_cache = (dt_image_cache_t *)malloc(sizeof(dt_image_cache_t));
  memset(darktable.image_cache, 0, sizeof(dt_image_cache_t));
  dt_image_cache_init(darktable.image_cache);

  darktable.mipmap_cache = (dt_mipmap_cache_t *)malloc(sizeof(dt_mipmap_cache_t));
  memset(darktable.mipmap_cache, 0, sizeof(dt_mipmap_cache_t));
  dt_mipmap_cache_init(darktable.mipmap_cache);

  // The GUI must be initialized before the views, because the init()
  // functions of the views depend on darktable.control->accels_* to register
  // their keyboard accelerators

  if(init_gui)
  {
    darktable.gui = (dt_gui_gtk_t *)malloc(sizeof(dt_gui_gtk_t));
    memset(darktable.gui,0,sizeof(dt_gui_gtk_t));
    if(dt_gui_gtk_init(darktable.gui, argc, argv)) return 1;
    dt_bauhaus_init();
  }
  else darktable.gui = NULL;

  darktable.view_manager = (dt_view_manager_t *)malloc(sizeof(dt_view_manager_t));
  memset(darktable.view_manager, 0, sizeof(dt_view_manager_t));
  dt_view_manager_init(darktable.view_manager);

  // load the darkroom mode plugins once:
  dt_iop_load_modules_so();

  if(init_gui)
  {
    darktable.lib = (dt_lib_t *)malloc(sizeof(dt_lib_t));
    memset(darktable.lib, 0, sizeof(dt_lib_t));
    dt_lib_init(darktable.lib);

    dt_control_load_config(darktable.control);
    g_strlcpy(darktable.control->global_settings.dbname, filename, 512); // overwrite if relocated.
  }
  darktable.imageio = (dt_imageio_t *)malloc(sizeof(dt_imageio_t));
  memset(darktable.imageio, 0, sizeof(dt_imageio_t));
  dt_imageio_init(darktable.imageio);

  if(init_gui)
  {
    // Loading the keybindings
    char keyfile[DT_MAX_PATH_LEN];

    // First dump the default keymapping
    snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc_default", datadir);
    gtk_accel_map_save(keyfile);

    // Removing extraneous semi-colons from the default keymap
    strip_semicolons_from_keymap(keyfile);

    // Then load any modified keys if available
    snprintf(keyfile, DT_MAX_PATH_LEN, "%s/keyboardrc", datadir);
    if(g_file_test(keyfile, G_FILE_TEST_EXISTS))
      gtk_accel_map_load(keyfile);
    else
      gtk_accel_map_save(keyfile); // Save the default keymap if none is present

    // I doubt that connecting to dbus for darktable-cli makes sense
    darktable.dbus = dt_dbus_init();

    // initialize undo struct
    darktable.undo = dt_undo_init();

    // load image(s) specified on cmdline
    int id = 0;
    if(images_to_load)
    {
      // If only one image is listed, attempt to load it in darkroom
      gboolean load_in_dr = (g_slist_next(images_to_load) == NULL);
      GSList *p = images_to_load;

      while (p != NULL)
      {
        // don't put these function calls into MAX(), the macro will evaluate
        // it twice (and happily deadlock, in this particular case)
        int newid = dt_load_from_string((gchar*)p->data, load_in_dr);
        id = MAX(id, newid);
        p = g_slist_next(p);
      }

      if (!load_in_dr || id == 0)
        dt_ctl_switch_mode_to(DT_LIBRARY);

      g_slist_free(images_to_load);
    }
    else
      dt_ctl_switch_mode_to(DT_LIBRARY);
  }

  /* start the indexer background job */
  dt_control_start_indexer();

  if(darktable.unmuted & DT_DEBUG_MEMORY)
  {
    fprintf(stderr, "[memory] after successful startup\n");
    dt_print_mem_usage();
  }

  return 0;
}
Ejemplo n.º 25
0
int main ( int argc, char *argv[] )

/******************************************************************************/
/*
  Purpose:

    MAIN is the main program for SCHEDULE_OPENMP.

  Discussion:

    This program demonstrates the difference between default,
    static and dynamic scheduling for a loop parallelized in OpenMP.

    The purpose of scheduling is to deal with loops in which there is
    known or suspected imbalance in the work load.  In this example,
    if the work is divided in the default manner between two threads,
    the second thread has 3 times the work of the first.  

    Both static and dynamic scheduling, if used, even out the work
    so that both threads have about the same load.  This could be
    expected to decrease the run time of the loop by about 1/3.

  Licensing:

    This code is distributed under the GNU LGPL license. 

  Modified:

    10 July 2010

  Author:

    John Burkardt
*/
{
  int n;
  int n_factor;
  int n_hi;
  int n_lo;
  int primes;
  double time1;
  double time2;
  double time3;

  printf ( "\n" );
  printf ( "SCHEDULE_OPENMP\n" );
  printf ( "  C/OpenMP version\n" );
  printf ( "  Count the primes from 1 to N.\n" );
  printf ( "  This is an unbalanced work load, particular for two threads.\n" );
  printf ( "  Demonstrate default, static and dynamic scheduling.\n" );
  printf ( "\n" );
  printf ( "  Number of processors available = %d\n", omp_get_num_procs ( )  );
  printf ( "  Number of threads =              %d\n", omp_get_max_threads ( )  );

  n_lo = 1;
  n_hi = 131072;
  n_factor = 2;

  printf ( "\n" );
  printf ( "                           Default        Static       Dynamic\n" );
  printf ( "         N     Pi(N)          Time          Time          Time\n" );
  printf ( "\n" );

  n = n_lo;

  while ( n <= n_hi )
  {
    time1 = omp_get_wtime ( );
    primes = prime_default ( n );
    time1 = omp_get_wtime ( ) - time1;

    time2 = omp_get_wtime ( );
    primes = prime_static ( n );
    time2 = omp_get_wtime ( ) - time2;

    time3 = omp_get_wtime ( );
    primes = prime_dynamic ( n );
    time3 = omp_get_wtime ( ) - time3;

    printf ( "  %8d  %8d  %12f  %12f  %12f\n", n, primes, time1, time2, time3 );

    n = n * n_factor;
  }
/*
  Terminate.
*/
  printf ( "\n" );
  printf ( "SCHEDULE_OPENMP\n" );
  printf ( "  Normal end of execution.\n" );

  return 0;
}
Ejemplo n.º 26
0
int main(int argc, char* argv[]) {

  const char* program_name = "contact_profile";
  bool optsOK = true;
  gmx::initForCommandLine(&argc,&argv);
  copyright(program_name);
  cout << "   Computes the standard atomic contacts for structures in" << endl;
  cout << "   the given xtc file. A topology PDB file and atom index file" << endl;
  cout << "   should be provided for determining the atoms to compare." << endl;
  cout << "   The resulting sparse contact distance profiles are" << endl;
  cout << "   in sparse vector format (index-file and data-file)." << endl;
  cout << endl;
  cout << "   Use -h or --help to see the complete list of options." << endl;
  cout << endl;

  // Option vars...
  int nthreads = 0;
  double sigma;
  double eps;
  string top_filename;
  string xtc_filename;
  string ndx_filename;
  const char* ndx_filename_ptr = NULL;
  string index_filename;
  string data_filename;

  // Declare the supported options.
  po::options_description cmdline_options;
  po::options_description program_options("Program options");
  program_options.add_options()
    ("help,h", "show this help message and exit")
    ("threads,t", po::value<int>(&nthreads)->default_value(omp_get_max_threads()>omp_get_num_procs()?omp_get_num_procs():omp_get_max_threads()), "Input:  Number of threads to start (int)")
    ("epsilon,e", po::value<double>(&eps)->default_value(9.0), "Input:  Contact cutoff (real)")
    //    ("sigma,q", po::value<double>(&sigma)->default_value(1), "Input:  Standard deviation of gaussian kernel (real)")
    ("topology-file,p", po::value<string>(&top_filename)->default_value("topology.pdb"), "Input:  Topology file [.pdb,.gro,.tpr] (string:filename)")
    ("xtc-file,x", po::value<string>(&xtc_filename)->default_value("traj.xtc"), "Input:  Trajectory file (string:filename)")
    ("ndx-file,n", po::value<string>(&ndx_filename), "Input: K-nn distances file (string:filename)")
    ("index-file,i", po::value<string>(&index_filename)->default_value("reference.svi"), "Output: Sparse vector indices file (string:filename)")    
    ("data-file,d", po::value<string>(&data_filename)->default_value("reference.svd"), "Output: Sparse vector data file (string:filename)")    
    ;
  cmdline_options.add(program_options);

  po::variables_map vm;
  po::store(po::parse_command_line(argc, argv, cmdline_options), vm);
  po::notify(vm);    

  if (vm.count("help")) {
    cout << "usage: " << program_name << " [options]" << endl;
    cout << cmdline_options << endl;
    return 1;
  }
  if (vm.count("ndx-file")) {
    ndx_filename_ptr = ndx_filename.c_str();
  }

  if (!optsOK) {
    return -1;
  }

  cout << "Running with the following options:" << endl;
  cout << "threads =       " << nthreads << endl;
  cout << "topology-file = " << top_filename << endl;
  cout << "xtc-file =      " << xtc_filename << endl;
  cout << "ndx-file =      " << ndx_filename << endl;
  cout << "index-file =    " << index_filename << endl;
  cout << "data-file =     " << data_filename << endl;
  cout << endl;
  
  // Local vars
  int step = 1;
  float time = 0.0;
  matrix box;
  float prec = 0.001;
  char buf[256];
  t_topology top;
  int ePBC;
  int natoms = 0;
  int nframes= 0;
  int update_interval = 1;
  t_fileio *ref_file;
  rvec *mycoords = NULL;
  gmx_bool bOK = 1;
  double *contact = NULL;
  vector<coord_array> *ref_coords = NULL;
  ::real *weights = NULL;
  int        gnx1,gnx2;
  atom_id    *index1,*index2;
  char       *grpname1,*grpname2;
  ofstream   index;
  ofstream   data;

  // Remove C stdout (silly GROMACS warnings going every which stream!)
  int myout = dup(1);
  dup2(2,1);

  // Setup threads
  omp_set_num_threads(nthreads);

  // Get number of atoms and check xtc
  cout << "Reading topology information from " << top_filename << " ... ";
  read_tps_conf(top_filename.c_str(), buf, &top, &ePBC, &mycoords,
		NULL, box, TRUE);
  cout << "done." << endl;
  delete [] mycoords;

  ref_file = open_xtc(xtc_filename.c_str(),"r");
  read_first_xtc(ref_file,&natoms, &step, &time, box, &mycoords, &prec, &bOK);
  close_xtc(ref_file);
  if (natoms != top.atoms.nr) {
    cout << "*** ERROR ***" << endl;
    cout << "Number of atoms in topology file ("
	 << top.atoms.nr << ") "
	 << "does not match the number of atoms "
	 << "in the XTC file (" << xtc_filename << " : " << natoms << ")."
	 << endl;
    exit(4);
  }

  // Get atom selections
  cout << "Please select two (non-overlapping) groups for contact profiling..." << endl;
  get_index(&top.atoms,ndx_filename_ptr,1,&gnx1,&index1,&grpname1);
  cout << endl;
  get_index(&top.atoms,ndx_filename_ptr,1,&gnx2,&index2,&grpname2);
  cout << endl;

  cout << "Total grid size is " << gnx1 << " x " << gnx2 << " = " << (gnx1*gnx2) << endl;

  // Read coordinates and weight-center all structures
  cout << "Reading reference coordinates from file: " << xtc_filename << " ... ";
  ref_coords = new vector<coord_array>;
  ref_file = open_xtc(xtc_filename.c_str(),"r");
  mycoords = new rvec[natoms];
  while (read_next_xtc(ref_file, natoms, &step, &time, box, mycoords, &prec, &bOK)) {
    ref_coords->push_back(mycoords);
    mycoords = new rvec[natoms];
  }
  close_xtc(ref_file);
  delete [] mycoords;
  mycoords = NULL;
  nframes = ref_coords->size();
  cout << "done." << endl;

  // Allocate vectors for storing the distances for a structure
  contact = new double[gnx1*gnx2];
  weights = new ::real[gnx1*gnx2];
  for (int x = 0; x < natoms; x++) weights[x] = top.atoms.atom[x].m;

#pragma omp parallel for
  for (int i = 0; i < gnx1; i++)
    for (int j = 0; j < gnx2; j++) {
      weights[(i*gnx2)+j] = top.atoms.atom[index1[i]].m * top.atoms.atom[index2[j]].m;
    }

  // Restore C stdout.
  dup2(myout,1);

  index.open(index_filename.c_str());
  data.open(data_filename.c_str());

  // Timer for ETA
  time_t start = std::time(0);
  time_t last = start;

  // Compute fits
  for (int frame = 0; frame < nframes; frame++) {

    // Update user of progress
    if (std::time(0) - last > update_interval) {
      last = std::time(0);
      time_t eta = start + ((last-start) * nframes / frame);
      cout << "\rFrame: " << frame << ", will finish " 
	   << string(std::ctime(&eta)).substr(0,20);
      cout.flush();
    }

    // Do Work
#pragma omp parallel for
    for (int i = 0; i < gnx1*gnx2; i++)
      contact[i] = 0.0;

#pragma omp parallel for
    for (int i = 0; i < gnx1; i++) {
      int ii = index1[i];
      for (int j = 0; j < gnx2; j++) {
	int jj = index2[j];
	double d = 0.0;
	for (int k = 0; k < 3; k++)
	  d += (((*ref_coords)[frame][ii][k] - (*ref_coords)[frame][jj][k]) *
		((*ref_coords)[frame][ii][k] - (*ref_coords)[frame][jj][k]));
	d = sqrt(d) * 10.0;
	// d = exp(-(d*d) / (2.0 * weights[(i*gnx2)+j]));
	// if (d > eps)
	//   contact[(i*gnx2)+j] = d;
	if (d < eps)
	  contact[(i*gnx2)+j] = 1.0;
      } // j
    } // i


    double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
    for (int i = 0; i < gnx1*gnx2; i++)
      sum += contact[i];

    sum = 1.0; // No normalization...
    int total = 0;
#pragma omp parallel for reduction(+:total)
    for (int i = 0; i < gnx1*gnx2; i++)
      if (contact[i] > 0) {
	contact[i] /= sum;
	total++;
      }

    index.write((char*) &total, sizeof(int) / sizeof(char));
    for (int i = 0; i < gnx1*gnx2; i++)
      if (contact[i] > 0.0) {
	index.write((char*) &i, sizeof(int) / sizeof(char));
	data.write((char*) &contact[i], sizeof(double) / sizeof(char));
      }

    // cout << frame << " " << total << endl;

  } // frame

  cout << endl << endl;

  index.close();
  data.close();

  // Clean coordinates
  for (vector<coord_array>::iterator itr = ref_coords->begin();
       itr != ref_coords->end(); itr++) delete [] (*itr);
  delete ref_coords;

  delete [] contact;
  delete [] weights;
  
  return 0;
}
Ejemplo n.º 27
0
int main(int argc,char* argv[])
{

	PlasmaData pdata(argc,argv);
	gnuplot_ctrl* plot;
	gnuplot_ctrl* plot_anim;


	plot = gnuplot_init();
	plot_anim = gnuplot_init();
	gnuplot_setstyle(plot,"lines");
	gnuplot_setstyle(plot_anim,"points");

	gnuplot_cmd(plot_anim,"set term gif animate nooptimize size 1280,1280 xffffffff");
	gnuplot_cmd(plot_anim,"set output \"particles.gif\"");

	gnuplot_cmd(plot_anim,"set xrange [-1:1]");
	gnuplot_cmd(plot_anim,"set yrange [-1:1]");

	float xmin = 0;
	float ymin = 0;
	float zmin = 0;

	float Lx = 5.0;
	float Ly = 5.0;
	float Lz = 5.0;

	int nx = 64;
	int ny = 64;
	int nz = 64;

	int nspecies = 1;

	const float dt = 0.01;

	const float dtau0 = 0.1;

	const int nptcls = 500;
	const int steps = 200;

	int iptcl[nptcls];

	float Ey = 5.0;
	float Bz = 100.0;




	pdata.nx = nx;
	pdata.ny = ny;
	pdata.nz = nz;

	pdata.Lx = Lx;
	pdata.Ly = Ly;
	pdata.Lz = Lz;

	pdata.xmin = xmin;
	pdata.ymin = ymin;
	pdata.zmin = zmin;
	pdata.epsilon_a = 1.0e-4;
	pdata.epsilon_r = 1.0e-10;

	pdata.dt = dt;

	pdata.niter_max = 20;

	pdata.nSubcycle_max = 1000;

	pdata.Bmag_avg = 1.0;
	pdata.ndimensions = 3;

	pdata.setup();

	FieldDataCPU fields;
	ParticleListCPU particles;
	HOMoments* moments;

	int numprocs = omp_get_num_procs();

	moments = (HOMoments*)malloc(numprocs*sizeof(HOMoments));

	for(int i=0;i<numprocs;i++)
	{
		moments[i] = *new HOMoments(&pdata);
	}
	float x_plot[nptcls][steps];
	float y_plot[nptcls][steps];
	float gx_plot[nptcls][steps];
	float gy_plot[nptcls][steps];

	float error_array[nptcls];


	//float x_plot_a[nptcls];
	//float y_plot_a[nptcls];


	fields.allocate(&pdata);
	particles.allocate(nptcls);

	fields.dx = pdata.dxdi;
	fields.dy = pdata.dydi;
	fields.dz = pdata.dzdi;

	particles.ispecies = 0;






	for(int i=0;i<nptcls;i++)
	{
		iptcl[i] = i;

		particles.px[i] = rand()%10000/10000.0;
		particles.py[i] = rand()%10000/10000.0;
		particles.pz[i] = 0.5;

		particles.ix[i] = nx/2;
		particles.iy[i] = ny/2;
		particles.iz[i] = nz/2;

		particles.vx[i] = 0.5*(2*(rand()%10000))/10000.0 + 0.5;
		particles.vy[i] = 0.5*(2*(rand()%10000))/10000.0 + 0.5;
		particles.vz[i] = 0.0* (rand()%50000 / 50000.0f - 0.5);

		error_array[i] = 0;


	}



	// Setup E-field
	for(int i=0;i<nx;i++)
	{
		for(int j=0;j<ny;j++)
		{
			for(int k=0;k<nz;k++)
			{
				float x = i*pdata.dxdi+xmin;
				float y = j*pdata.dydi+ymin;
				float z = k*pdata.dzdi+zmin;

				float Ex = -1.0*x;


				fields.getE(i,j,k,0) = 0;
				fields.getE(i,j,k,1) = Ey;
				fields.getE(i,j,k,2) = 0;

				fields.getB(i,j,k,0) = 0;
				fields.getB(i,j,k,1) = 0;
				fields.getB(i,j,k,2) = Bz;


			//	printf("fields(%i,%i,%i) = %f, %f, %f\n",i,j,k,
				//	fields.getE(i,j,k,0),fields.getE(i,j,k,1),fields.getE(i,j,k,2));
			}
		}
	}

	fields.q2m[0] = 1.0;

	printf("Efield setup complete\n");

	float time;
	double avg_error = 0.0;
	int n_error = 0;

	CPUTimer timer;


	moments->init_plot();

	timer.start();
	for(int i=0;i<steps;i++)
	{
		//time = dtau0*(i);


		//moments.set_vals(0);
		particles.push(&pdata,&fields,moments);
		printf("finished step %i\n",i);


		for(int j=0;j<nptcls;j++)
		{

			float px,py,gx,gy;
			float rl;
			float vx,vy,vxy,vz,vxyz;

			float vgx,vgy;
			float verror;

			px = (particles.px[j] + particles.ix[j])*pdata.dxdi + pdata.xmin;
			py = (particles.py[j] + particles.iy[j])*pdata.dydi + pdata.ymin;

			vx = particles.vx[j];
			vy = particles.vy[j];
			vz = particles.vz[j];
			vxy = sqrt(vx*vx+vy*vy);

			vxyz = sqrt(vxy*vxy + vz*vz);

			rl = vxy/Bz;

			gx = vy*Bz/sqrt(vx*Bz*vx*Bz + vy*Bz*vy*Bz)*rl + px;
			gy = -vx*Bz/sqrt(vx*Bz*vx*Bz + vy*Bz*vy*Bz)*rl + py;

			x_plot[j][i] = px;
			y_plot[j][i] = py;

			gx_plot[j][i] = gx;
			gy_plot[j][i] = gy;

			if(i >= 1)
			{
				vgx = (gx_plot[j][i] - gx_plot[j][0])/(dt*(i));
				vgy = (gy_plot[j][i] - gy_plot[j][0])/(dt*(i));

				verror = fabs(Ey/Bz - vgx)/(Ey/Bz);

				error_array[j] = fmax(error_array[j],verror);

				avg_error += verror;
				n_error ++;

			//	printf("true[%i] v = %e, %e actual v = %e, %e, error = %e\n",
			//			j,Ey/Bz,0.0f,vgx,vgy,verror);
			}

		}




		//if((i+1)%64 == 0)
		//gnuplot_resetplot(plot_anim);
/*
		float diff_avg = 0.0;
		for(int j=0;j<nptcls;j++)
		{

			x_plot[j][i] = (particles.px[j] + particles.ix[j])*pdata.dxdi + pdata.xmin;
			y_plot[j][i] = (particles.py[j] + particles.iy[j])*pdata.dydi + pdata.ymin;

			//printf("particle %i with position %f, %f\n",j,x_plot[j][i],y_plot[j][i]);

		//	x_plot_a[j] = x_plot[j][i];
		//	y_plot_a[j] = y_plot[j][i];

		}
*/

		//avg_error += diff_avg / steps;


		//gnuplot_plot_xy(plot_anim,x_plot_a,y_plot_a,nptcls,NULL);


	}
	timer.stop();
	printf("average error = %e \n",avg_error/((float)n_error));
	printf("Run did %f particles per second\n",nptcls*steps/(timer.diff()*1.0e-3));

	for(int j=0;j<nptcls;j++)
	{
		if(error_array[j] >= 1.0e-2)
			gnuplot_plot_xy(plot,x_plot[j],y_plot[j],steps,NULL);


	}


	//moments->plot(nz/2,0,HOMoments_currentx);


	printf("Press 'Enter' to continue\n");
		getchar();

	moments->close_plot();



	gnuplot_close(plot);

	gnuplot_close(plot_anim);

}
Ejemplo n.º 28
0
int main ()

/******************************************************************************/
/*
    Purpose:

        MAIN is the main program for MD_OPENMP.

    Discussion:

        MD implements a simple molecular dynamics simulation.

        The program uses Open MP directives to allow parallel computation.

        The velocity Verlet time integration scheme is used.

        The particles interact with a central pair potential.

        Output of the program is saved in the TNG format, which is why this
        code is included in the TNG API release. The high-level API of the
        TNG API is used where appropriate.

    Licensing:

        This code is distributed under the GNU LGPL license.

    Modified:

        8 Jan 2013

    Author:

        Original FORTRAN77 version by Bill Magro.
        C version by John Burkardt.
        TNG trajectory output by Magnus Lundborg.

    Parameters:

        None
*/
{
    float *acc;
    float *box;
    float *box_shape;
    float dt = 0.0002;
    float e0;
    float *force;
    int i;
    float kinetic;
    float mass = 1.0;
    int nd = 3;
    int np = 50;
    float *pos;
    float potential;
    int proc_num;
    int seed = 123456789;
    int step;
    int step_num = 50000;
    int step_print;
    int step_print_index;
    int step_print_num;
    int step_save;
    float *vel;
    float wtime;
    tng_trajectory_t traj;
    tng_molecule_t molecule;
    tng_chain_t chain;
    tng_residue_t residue;
    tng_atom_t atom;

    timestamp ( );

    proc_num = omp_get_num_procs ( );

    acc = ( float * ) malloc ( nd * np * sizeof ( float ) );
    box = ( float * ) malloc ( nd * sizeof ( float ) );
    box_shape = (float *) malloc (9 * sizeof (float));
    force = ( float * ) malloc ( nd * np * sizeof ( float ) );
    pos = ( float * ) malloc ( nd * np * sizeof ( float ) );
    vel = ( float * ) malloc ( nd * np * sizeof ( float ) );

    printf ( "\n" );
    printf ( "MD_OPENMP\n" );
    printf ( "  C/OpenMP version\n" );
    printf ( "\n" );
    printf ( "  A molecular dynamics program.\n" );

    printf ( "\n" );
    printf ( "  NP, the number of particles in the simulation is %d\n", np );
    printf ( "  STEP_NUM, the number of time steps, is %d\n", step_num );
    printf ( "  DT, the size of each time step, is %f\n", dt );

    printf ( "\n" );
    printf ( "  Number of processors available = %d\n", proc_num );
    printf ( "  Number of threads =              %d\n", omp_get_max_threads ( ) );


    printf("\n");
    printf("  Initializing trajectory storage.\n");
    /* Initialize the TNG trajectory */
    tng_util_trajectory_open(TNG_EXAMPLE_FILES_DIR "tng_md_out.tng", 'w', &traj);



    /* Set molecules data */
    /* N.B. This is still not done using utility functions. The low-level API
     * is used. */
    printf("  Creating molecules in trajectory.\n");
    tng_molecule_add(traj, "water", &molecule);
    tng_molecule_chain_add(traj, molecule, "W", &chain);
    tng_chain_residue_add(traj, chain, "WAT", &residue);
    if(tng_residue_atom_add(traj, residue, "O", "O", &atom) == TNG_CRITICAL)
    {
        tng_util_trajectory_close(&traj);
        printf("  Cannot create molecules.\n");
        exit(1);
    }
    tng_molecule_cnt_set(traj, molecule, np);


/*
    Set the dimensions of the box.
*/
    for(i = 0; i < 9; i++)
    {
        box_shape[i] = 0.0;
    }
    for ( i = 0; i < nd; i++ )
    {
        box[i] = 10.0;
        /* box_shape stores 9 values according to the TNG specs */
        box_shape[i*nd + i] = box[i];
    }


    printf ( "\n" );
    printf ( "  Initializing positions, velocities, and accelerations.\n" );
/*
    Set initial positions, velocities, and accelerations.
*/
    initialize ( np, nd, box, &seed, pos, vel, acc );
/*
    Compute the forces and energies.
*/
    printf ( "\n" );
    printf ( "  Computing initial forces and energies.\n" );

    compute ( np, nd, pos, vel, mass, force, &potential, &kinetic );

    e0 = potential + kinetic;

    /* Saving frequency */
    step_save = 400;

    step_print = 0;
    step_print_index = 0;
    step_print_num = 10;

/*
    This is the main time stepping loop:
        Compute forces and energies,
        Update positions, velocities, accelerations.
*/
    printf("  Every %d steps box shape, particle positions, velocities and forces are\n",
           step_save);
    printf("  saved to a TNG trajectory file.\n");
    printf ( "\n" );
    printf ( "  At certain step intervals, we report the potential and kinetic energies.\n" );
    printf ( "  The sum of these energies should be a constant.\n" );
    printf ( "  As an accuracy check, we also print the relative error\n" );
    printf ( "  in the total energy.\n" );
    printf ( "\n" );
    printf ( "      Step      Potential       Kinetic        (P+K-E0)/E0\n" );
    printf ( "                Energy P        Energy K       Relative Energy Error\n" );
    printf ( "\n" );

    step = 0;
    printf ( "  %8d  %14f  %14f  %14e\n",
        step, potential, kinetic, ( potential + kinetic - e0 ) / e0 );
    step_print_index++;
    step_print = ( step_print_index * step_num ) / step_print_num;

    /* Set the output frequency of box shape, positions, velocities and forces */
    if(tng_util_box_shape_write_frequency_set(traj, step_save) != TNG_SUCCESS)
    {
        printf("Error setting writing frequency data. %s: %d\n",
               __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_pos_write_frequency_set(traj, step_save) != TNG_SUCCESS)
    {
        printf("Error setting writing frequency data. %s: %d\n",
               __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_vel_write_frequency_set(traj, step_save) != TNG_SUCCESS)
    {
        printf("Error setting writing frequency data. %s: %d\n",
               __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_force_write_frequency_set(traj, step_save) != TNG_SUCCESS)
    {
        printf("Error setting writing frequency data. %s: %d\n",
               __FILE__, __LINE__);
        exit(1);
    }

    /* Write the first frame of box shape, positions, velocities and forces */
    if(tng_util_box_shape_write(traj, 0, box_shape) != TNG_SUCCESS)
    {
        printf("Error writing box shape. %s: %d\n",
               __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_pos_write(traj, 0, pos) != TNG_SUCCESS)
    {
        printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_vel_write(traj, 0, vel) != TNG_SUCCESS)
    {
        printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
        exit(1);
    }
    if(tng_util_force_write(traj, 0, force) != TNG_SUCCESS)
    {
        printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
        exit(1);
    }

    wtime = omp_get_wtime ( );

    for ( step = 1; step < step_num; step++ )
    {
        compute ( np, nd, pos, vel, mass, force, &potential, &kinetic );

        if ( step == step_print )
        {
            printf ( "  %8d  %14f  %14f  %14e\n", step, potential, kinetic,
             ( potential + kinetic - e0 ) / e0 );
            step_print_index++;
            step_print = ( step_print_index * step_num ) / step_print_num;
        }
        if(step % step_save == 0)
        {
            /* Write box shape, positions, velocities and forces */
            if(tng_util_box_shape_write(traj, step, box_shape) != TNG_SUCCESS)
            {
                printf("Error writing box shape. %s: %d\n",
                       __FILE__, __LINE__);
                exit(1);
            }
            if(tng_util_pos_write(traj, step, pos) != TNG_SUCCESS)
            {
                printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
                break;
            }
            if(tng_util_vel_write(traj, step, vel) != TNG_SUCCESS)
            {
                printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
                break;
            }
            if(tng_util_force_write(traj, step, force) != TNG_SUCCESS)
            {
                printf("Error adding data. %s: %d\n", __FILE__, __LINE__);
                break;
            }
        }
        update ( np, nd, pos, vel, force, acc, mass, dt );
    }
    wtime = omp_get_wtime ( ) - wtime;

    printf ( "\n" );
    printf ( "  Elapsed time for main computation:\n" );
    printf ( "  %f seconds.\n", wtime );

    free ( acc );
    free ( box );
    free ( box_shape );
    free ( force );
    free ( pos );
    free ( vel );

    /* Close the TNG output. */
    tng_util_trajectory_close(&traj);

    printf ( "\n" );
    printf ( "MD_OPENMP\n" );
    printf ( "  Normal end of execution.\n" );

    printf ( "\n" );
    timestamp ( );

    return 0;
}
Ejemplo n.º 29
0
int main( int argc, char *argv[] )
{
//    try {

        time_t programStartTime(time(NULL));
        boost::filesystem::path workingDir( boost::filesystem::current_path());


        // ========== PROGRAM PARAMETERS ==========

        std::string progName("partitiontree");
        std::string configFilename("../../config/"+progName+".cfg");
        unsigned int threads(0), levelDepth(3), filterRadius(0);
        bool verbose(false), niftiMode( true );

        // program parameters
        std::string treeFilename, outputFolder;

        // Declare a group of options that will be allowed only on command line
        boost::program_options::options_description genericOptions("Generic options");
        genericOptions.add_options()
                ( "version", "Program version" )
                ( "help,h", "Produce extended program help message" )
                ( "tree,t",  boost::program_options::value< std::string >(&treeFilename), "file with the tree to compute partitions from")
                ( "outputf,O",  boost::program_options::value< std::string >(&outputFolder), "output folder where partition files will be written")
                ( "search-depth,d", boost::program_options::value< unsigned int >(&levelDepth)->implicit_value(3), "[opt] optimal partition search depth (default = 3)")
                ( "filter-radius,r", boost::program_options::value< unsigned int >(&filterRadius)->implicit_value(0), "[opt] output partition filter kernel radius (default = 0 | no filtering)")
                ( "hoz", "[opt] obtain horizontal cut partitions (instead of Spread-Separation ones)")
                ( "maxgran,m", "[opt] obtain only the maximum granularity partition")
                ;

        // Declare a group of options that will be allowed both on command line and in config file
        boost::program_options::options_description configOptions("Configuration");
        configOptions.add_options()
                ( "verbose,v", "[opt] verbose output." )
                ( "vista", "[opt] use vista file format (default is nifti)." )
                ( "pthreads,p",  boost::program_options::value< unsigned int >(&threads), "[opt] number of processing threads to run the program in parallel, default: all available")
                ;

        // Hidden options, will be allowed both on command line and in config file, but will not be shown to the user.
        boost::program_options::options_description hiddenOptions("Hidden options");
        //hiddenOptions.add_options() ;

        boost::program_options::options_description cmdlineOptions;
        cmdlineOptions.add(genericOptions).add(configOptions).add(hiddenOptions);
        boost::program_options::options_description configFileOptions;
        configFileOptions.add(configOptions).add(hiddenOptions);
        boost::program_options::options_description visibleOptions("Allowed options");
        visibleOptions.add(genericOptions).add(configOptions);
        boost::program_options::positional_options_description posOpt; //this arguments do not need to specify the option descriptor when typed in
        //posOpt.add("roi-file", -1);

        boost::program_options::variables_map variableMap;
        store(boost::program_options::command_line_parser(argc, argv).options(cmdlineOptions).positional(posOpt).run(), variableMap);

        std::ifstream ifs(configFilename.c_str());
        store(parse_config_file(ifs, configFileOptions), variableMap);
        notify(variableMap);


        if (variableMap.count("help"))
        {
            std::cout << "---------------------------------------------------------------------------" << std::endl;
            std::cout << std::endl;
            std::cout << " Project: hClustering" << std::endl;
            std::cout << std::endl;
            std::cout << " Whole-Brain Connectivity-Based Hierarchical Parcellation Project" << std::endl;
            std::cout << " David Moreno-Dominguez" << std::endl;
            std::cout << " [email protected]" << std::endl;
            std::cout << " [email protected]" << std::endl;
            std::cout << " www.cbs.mpg.de/~moreno" << std::endl;
            std::cout << std::endl;
            std::cout << " For more reference on the underlying algorithm and research they have been used for refer to:" << std::endl;
            std::cout << " - Moreno-Dominguez, D., Anwander, A., & Knösche, T. R. (2014)." << std::endl;
            std::cout << "   A hierarchical method for whole-brain connectivity-based parcellation." << std::endl;
            std::cout << "   Human Brain Mapping, 35(10), 5000-5025. doi: http://dx.doi.org/10.1002/hbm.22528" << std::endl;
            std::cout << " - Moreno-Dominguez, D. (2014)." << std::endl;
            std::cout << "   Whole-brain cortical parcellation: A hierarchical method based on dMRI tractography." << std::endl;
            std::cout << "   PhD Thesis, Max Planck Institute for Human Cognitive and Brain Sciences, Leipzig." << std::endl;
            std::cout << "   ISBN 978-3-941504-45-5" << std::endl;
            std::cout << std::endl;
            std::cout << " hClustering is free software: you can redistribute it and/or modify" << std::endl;
            std::cout << " it under the terms of the GNU Lesser General Public License as published by" << std::endl;
            std::cout << " the Free Software Foundation, either version 3 of the License, or" << std::endl;
            std::cout << " (at your option) any later version." << std::endl;
            std::cout << " http://creativecommons.org/licenses/by-nc/3.0" << std::endl;
            std::cout << std::endl;
            std::cout << " hClustering is distributed in the hope that it will be useful," << std::endl;
            std::cout << " but WITHOUT ANY WARRANTY; without even the implied warranty of" << std::endl;
            std::cout << " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the" << std::endl;
            std::cout << " GNU Lesser General Public License for more details." << std::endl;
            std::cout << std::endl;
            std::cout << "---------------------------------------------------------------------------" << std::endl << std::endl;
            std::cout << "partitiontree" << std::endl << std::endl;
            std::cout << "Obtain tree partitions at all granularity levels using the Spread-Separation method (finding the the partition with highest SS index at each granularity)." << std::endl;
            std::cout << " Optimal SS value for each partition is searched within a defined search-depth hierarchical levels. Final partitions can be filtered with a defined kernel size." << std::endl;
            std::cout << " to keep local SS maxima within that kernel. For SS index refer to (Moreno-Dominguez, 2014)" << std::endl;
            std::cout << " For an interactive 3D partition management with more options please use the Hierarchcial Clustering module developed in OpenWalnut (www.openwalnut.org)." << std::endl << std::endl;
            std::cout << "* Arguments:" << std::endl << std::endl;
            std::cout << " --version:       Program version." << std::endl << std::endl;
            std::cout << " -h --help:       produce extended program help message." << std::endl << std::endl;
            std::cout << " -t --tree:       File with the hierarchical tree to extract partitions from." << std::endl << std::endl;
            std::cout << " -O --outputf:    Output folder where partition files will be written." << std::endl << std::endl;
            std::cout << "[-d --search-depth]:  Search optimal partition for each granularity within d hierarchical levels." << std::endl;
            std::cout << "                       A higher value will produce more optimized partition but will increase computing time." << std::endl;
            std::cout << "                       Default: 3. Recommendened values: 3 for good quality and fast computation, 4 for enhanced quality." << std::endl << std::endl;
            std::cout << "[-r --filter-radius]: Filter output partitions to keep only local SS (partition quality) maxima" << std::endl;
            std::cout << "                       within a r-sized kernel across the granularity dimension." << std::endl << std::endl;
            std::cout << "[-h --hoz]:       Write horizontal cut partitions instead of SS ones (optimal partition search is still based on SS index)." << std::endl << std::endl;
            std::cout << "[-m --maxgran]:   Compute and write only the maximum granularity (meta-leaves) partition." << std::endl << std::endl;
            std::cout << "[-v --verbose]:   verbose output (recommended)." << std::endl << std::endl;
            std::cout << "[--vista]: 	    write output tree in vista coordinates (default is nifti)." << std::endl << std::endl;
            std::cout << "[-p --pthreads]:  number of processing threads to run the program in parallel. Default: use all available processors." << std::endl << std::endl;
            std::cout << std::endl;
            std::cout << "* Usage example:" << std::endl << std::endl;
            std::cout << " partitiontree -t tree_lh.txt -O results/ -d 3 -r 50 -v" << std::endl << std::endl;
            std::cout << std::endl;
            std::cout << "* Outputs (in output folder defined at option -O):" << std::endl << std::endl;
            std::cout << " (default outputs)" << std::endl;
            std::cout << " - 'allSSparts_dX.txt' - (where X is the search depth level defined at parameter -d) Contains a summary of the partition information (cut value and size) for all granularities." << std::endl;
            std::cout << " - 'TREE_SSparts_dX.txt' - (where TREE is the filename of the input tree defined at parameter -t) contains a copy of the original tree file with the partitions at all granularities included in the relevant fields." << std::endl;
            std::cout << " - 'partitiontree_log.txt' - A text log file containing the parameter details and in-run and completion information of the program." << std::endl;
            std::cout << std::endl;
            std::cout << " (additional if using option -r)" << std::endl;
            std::cout << " - 'filtSSparts_dX_rY.txt' - (where Y is the filter radius defined at parameter -r) Contains a summary of the resulting filtered partitions." << std::endl;
            std::cout << " - 'TREE_SSparts_dX_rY.txt' - contains a copy of the original tree file with the resulting filtered partitions included in the relevant fields." << std::endl;
            std::cout << std::endl;
            std::cout << " (when using --hoz option, the prefix 'SS' will be replaced by 'Hoz'')" << std::endl;
            std::cout << std::endl;
            std::cout << " (alternative outputs when using option --maxgran)" << std::endl;
            std::cout << " - 'fmaxgranPart.txt' - Contains the size information of the resulting maximal granularity partition for that tree." << std::endl;
            std::cout << " - 'TREE_maxgranPart.txt' - contains a copy of the original tree file with the resulting max granularity partition included in the relevant fields." << std::endl;
            std::cout << std::endl;
            exit(0);
        }
        if (variableMap.count("version"))
        {
            std::cout << progName <<", version 2.0"<<std::endl;
            exit(0);
        }
        if (variableMap.count("verbose"))
        {
            std::cout << "verbose output"<<std::endl;
            verbose=true;
        }

        if (variableMap.count("pthreads"))
        {
            if (threads==1)
            {
                std::cout <<"Using a single processor"<< std::endl;
            }
            else if(threads==0 || threads>=omp_get_num_procs())
            {
                threads = omp_get_num_procs();
                std::cout <<"Using all available processors ("<< threads <<")." << std::endl;
            }
            else
            {
                std::cout <<"Using a maximum of "<< threads <<" processors "<< std::endl;
            }
            omp_set_num_threads( threads );
        }
        else
        {
            threads = omp_get_num_procs();
            omp_set_num_threads( threads );
            std::cout <<"Using all available processors ("<< threads <<")." << std::endl;
        }

        if ( variableMap.count( "vista" ) )
        {
            if( verbose )
            {
                std::cout << "Using vista format" << std::endl;
            }
            fileManagerFactory fmf;
            fmf.setVista();
            niftiMode = false;
        }
        else
        {
            if( verbose )
            {
                std::cout << "Using nifti format" << std::endl;
            }
            fileManagerFactory fmf;
            fmf.setNifti();
            niftiMode = true;
        }

        if (variableMap.count("tree"))
        {
            if(!boost::filesystem::is_regular_file(boost::filesystem::path(treeFilename)))
            {
                std::cerr << "ERROR: tree file \""<<treeFilename<<"\" is not a regular file"<<std::endl;
                std::cerr << visibleOptions << std::endl;
                exit(-1);
            }
            std::cout << "Roi voxels file: "<< treeFilename << std::endl;
        }
        else
        {
            std::cerr << "ERROR: no tree file stated"<<std::endl;
            std::cerr << visibleOptions << std::endl;
            exit(-1);
        }


        if (variableMap.count("outputf"))
        {
            if(!boost::filesystem::is_directory(boost::filesystem::path(outputFolder)))
            {
                std::cerr << "ERROR: output folder \""<<outputFolder<<"\" is not a directory"<<std::endl;
                std::cerr << visibleOptions << std::endl;
                exit(-1);

            }
            std::cout << "Output folder: "<< outputFolder << std::endl;
        }
        else
        {
            std::cerr << "ERROR: no output folder stated"<<std::endl;
            std::cerr << visibleOptions << std::endl;
            exit(-1);

        }



        if (variableMap.count("maxgran"))
        {
            std::cout<<"Obtaining only max. granularity partition..."<<std::endl;

            WHtree tree(treeFilename);
            std::cout<<tree.getReport( false )<<std::endl;
            if( tree.testRootBaseNodes() )
            {
                std::vector<size_t > maxpart( tree.getRootBaseNodes() );
                std::vector<std::vector<size_t > > partitionVector( 1, maxpart);
                std::vector<float > partitionValues(1,0);
                std::cout<<"maxgranpart size: "<<std::endl<<maxpart.size()<<std::endl;
                WHtreePartition partitioner(&tree);
                std::string outPartFilename( outputFolder + "/maxgranPart.txt" );
                partitioner.writePartitionSet( outPartFilename, partitionValues,partitionVector);
                tree.insertPartitions( partitionVector, partitionValues );
                std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_maxgranPart" );
                outTreeFilename += ( ".txt" );
                tree.writeTree( outTreeFilename, niftiMode );
                return 0;
            }
            else
            {
                std::cout<<"ERROR: tree  does not have a maximum granularity meta-leaf partition"<<std::endl;
                return(-1);
            }
        }

        if( levelDepth > 5 )
        {
            std::cout << "Level depth indicated: " << levelDepth << " is too high, setting to a maximum of 5" << std::endl;
            levelDepth = 5;
        }
        std::cout << "Using a search depth of: " << levelDepth << std::endl;

        if( filterRadius > 1000 )
        {
            std::cout << "filter radius indicated: " << filterRadius << " is too high (max is 1000), setting to 100" << std::endl;
            filterRadius = 10;
        }
        if( filterRadius == 0 )
        {
            std::cout << "using no filtering (radius 0)" << std::endl;
        }
        else if( filterRadius < 0 )
        {
            std::cout << "filter radius indicated: " << filterRadius << " must be positive. using no filtering (radius 0)" << std::endl;
            filterRadius = 0;
        }
        else
        {
            std::cout << "Using a filter radius of: " << filterRadius << std::endl;
        }

        /////////////////////////////////////////////////////////////////



        std::string logFilename(outputFolder+"/"+progName+"_log.txt");
        std::ofstream logFile(logFilename.c_str());
        if(!logFile) {
            std::cerr << "ERROR: unable to open log file: \""<<logFilename<<"\""<<std::endl;
            exit(-1);
        }
        logFile <<"Start Time:\t"<< ctime(&programStartTime) <<std::endl;
        logFile <<"Working directory:\t"<< workingDir.string() <<std::endl;
        logFile <<"Verbose:\t"<< verbose <<std::endl;
        logFile <<"Tree file:\t"<< treeFilename <<std::endl;
        logFile <<"Output folder:\t"<< outputFolder <<std::endl;
        logFile <<"Verbose:\t"<< verbose <<std::endl;
        if( niftiMode )
        {
            logFile << "Using nifti file format" << std::endl;
        }
        else
        {
            logFile << "Using vista file format" << std::endl;
        }

        WHtree tree(treeFilename);

        logFile << tree.getReport( false ) <<std::endl;
        std::cout<<tree.getReport( false )<<std::endl;

        std::vector< float > partitionValues;
        std::vector< std::vector< size_t> > partitionVector;

        WHtreePartition treePartition(&tree);

        std::string prefix;

        if (variableMap.count("hoz"))
        {
            prefix = "Hoz";
            std::cout <<"getting hoz partitions at all levels..." <<std::endl;
            treePartition.scanHozPartitions( &partitionValues, &partitionVector );

            std::cout << partitionValues.size() << " Partitions obtained, writing to file..." <<std::endl;
            logFile <<"Initial partitions:\t"<< partitionValues.size() <<std::endl;
            std::string outPartFilename( outputFolder + "/all" + prefix + "parts.txt" );
            treePartition.writePartitionSet( outPartFilename, partitionValues, partitionVector);

            tree.insertPartitions( partitionVector, partitionValues );
            std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) );
            outTreeFilename += ( ".txt" );
            tree.writeTree( outTreeFilename, niftiMode );
        }
        else
        {

            prefix = "SS";
            std::cout <<"getting SS partitions at all levels..." <<std::endl;
            treePartition.scanOptimalPartitions( levelDepth, &partitionValues, &partitionVector );

            std::cout << partitionValues.size() << " Partitions obtained, writing to file..." <<std::endl;
            logFile <<"Initial partitions:\t"<< partitionValues.size() <<std::endl;
            std::string outPartFilename( outputFolder + "/all" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) + ".txt" );
            treePartition.writePartitionSet( outPartFilename, partitionValues, partitionVector);

            tree.insertPartitions( partitionVector, partitionValues );
            std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) );
            outTreeFilename += ( ".txt" );
            tree.writeTree( outTreeFilename, niftiMode );

        }


        std::vector < unsigned int > filterRadii;
        //filterRadii.reserve( 6 );
        //        filterRadii.push_back( 1 );
        //        filterRadii.push_back( 2 );
        //        filterRadii.push_back( 5 );
        //        filterRadii.push_back( 10 );
        //        filterRadii.push_back( 15 );
        //        filterRadii.push_back( 20 );
        filterRadii.push_back( filterRadius );



        for(size_t i=0; i< filterRadii.size(); ++i)
        {
            if( filterRadii[i] <= 0 )
            {
                continue;
            }
            std::vector< float > filtPartValues( partitionValues );
            std::vector< std::vector< size_t> > filtPartVector( partitionVector );

            std::cout << "Filtering with a radius of "<< filterRadii[i] << "..." <<std::endl;
            treePartition.filterMaxPartitions( filterRadii[i], &filtPartValues, &filtPartVector );

            std::cout << filtPartValues.size() << " Filtered partitions obtained, writing to file..." <<std::endl;
            logFile <<"Filtered partitions:\t"<< filtPartValues.size() <<std::endl;
            std::string outPartFilename( outputFolder + "/filt" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) );
            outPartFilename += ( "_r" + boost::lexical_cast<std::string>(filterRadii[i]) +  ".txt" );
            treePartition.writePartitionSet(outPartFilename, filtPartValues, filtPartVector);

            std::cout << "Adding filtered partitions to tree and writing..." <<std::endl;

            std::string outTreeFilename( outputFolder + "/" + tree.getName() + "_" + prefix + "parts_d" + boost::lexical_cast<std::string>(levelDepth) );
            outTreeFilename += ( "_r" + boost::lexical_cast<std::string>(filterRadii[i]) +  ".txt" );

            tree.insertPartitions( filtPartVector, filtPartValues );
            tree.writeTree( outTreeFilename, niftiMode );
        }



        /////////////////////////////////////////////////////////////////


        // save and print total time
        time_t programEndTime(time(NULL));
        int totalTime( difftime(programEndTime,programStartTime) );
        std::cout <<"Program Finished, total time: "<< totalTime/3600 <<"h "<<  (totalTime%3600)/60 <<"' "<< ((totalTime%3600)%60) <<"\"   "<< std::endl;
        logFile <<"-------------"<<std::endl;
        logFile <<"Finish Time:\t"<< ctime(&programEndTime) <<std::endl;
        logFile <<"Elapsed time : "<< totalTime/3600 <<"h "<<  (totalTime%3600)/60 <<"' "<< ((totalTime%3600)%60) <<"\""<< std::endl;


//    }
//    catch(std::exception& e)
//    {
//        std::cout << e.what() << std::endl;
//        return 1;
//    }
    return 0;
}
Ejemplo n.º 30
0
int main ( void )

/******************************************************************************/
/*
  Purpose:

    MAIN is the main program for RANDOM_OPENMP.

  Discussion:

    This program simply explores one issue in the generation of random
    numbers in a parallel program.  If the random number generator uses
    an integer seed to determine the next entry, then it is not easy for
    a parallel program to reproduce the same exact sequence.

    But what is worse is that it might not be clear how the separate
    OpenMP threads should handle the SEED value - as a shared or private
    variable?  It seems clear that each thread should have a private
    seed that is initialized to a distinct value at the beginning of
    the computation.

  Licensing:

    This code is distributed under the GNU LGPL license. 

  Modified:

    03 September 2012

  Author:

    John Burkardt
*/
{
  int n;
  int seed;

  timestamp ( );

  printf ( "\n" );
  printf ( "RANDOM_OPENMP\n" );
  printf ( "  C version\n" );
  printf ( "  An OpenMP program using random numbers.\n" );
  printf ( "  The random numbers depend on a seed.\n" );
  printf ( "  We need to insure that each OpenMP thread\n" );
  printf ( "  starts with a different seed.\n" );
  printf ( "\n" );
  printf ( "  Number of processors available = %d\n", omp_get_num_procs ( ) );
  printf ( "  Number of threads =              %d\n", omp_get_max_threads ( ) );

  n = 100;
  seed = 123456789;
  monte_carlo ( n, &seed );
/*
  Terminate.
*/
  printf ( "\n" );
  printf ( "RANDOM_OPENMP\n" );
  printf ( "  Normal end of execution.\n" );

  printf ( "\n" );
  timestamp ( );

  return 0;
}