Esempio n. 1
0
Points* points_grayCode(int n) {
	Bool b;
	
	Points* points, * subPoints;
	PointItem* pointItem, *pointItemLast;
	PointItem* mirrorItemFirst, * mirrorItem, * mirrorItemTemp;

	Point point;
	points = points_init();

	if (n == 1) {
		b = 0;
		point.vect = &b;
		point.dim = 1;
		points_add(points,point);

		b = 1;
		point.vect = &b;
		points_add(points,point);

		return points;
	}
	subPoints = points_grayCode(n-1);
	pointItemLast = points_getLastPoint(subPoints);
	
	/**
	 * Algo:
	 * Mirror the points.
	 * Original points vectors prepend with 0, the mirror with 1.
	 */

	mirrorItemFirst = points_itemInit(subPoints->point->p);

	pointItem = subPoints->point;		
	mirrorItem = mirrorItemFirst;

	pointItem->p = point_boolPrepend(pointItem->p, 0);
	mirrorItem->p = point_boolPrepend(mirrorItem->p, 1);

	while (pointItem->next != 0) {
		pointItem = pointItem->next;

	    mirrorItemTemp = points_itemInit(pointItem->p);

	    mirrorItem->next = mirrorItemTemp;
	    
		pointItem->p = point_boolPrepend(pointItem->p, 0);
	    mirrorItemTemp->p = point_boolPrepend(mirrorItemTemp->p, 1);
	    mirrorItem = mirrorItemTemp;
	}
	pointItemLast->next = points_reverse(mirrorItemFirst);
	return subPoints;
}
Esempio n. 2
0
int main(int argc, char *argv[]) {

  int np = 0;     // number of MPI processes
  int rank = 0;   // number assigned to this MPI process
  int restart_stop = 0; // boolean to determine when to stop restart loop

  // set up MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &np);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  time_t startwalltime = time(NULL);
  time_t timestepwalltime = time(NULL);
  time_t diffwalltime = difftime(timestepwalltime, startwalltime);

  if(rank == MASTER) {
    int turb = 0;
    MPI_Status status; // for MPI communication

    // parse command-line arguments
    // if none given, run normally
    // if -s given, run seeder program only
    // if anything else given, exit
    // this code comes from K&R, p. 117
    int lambflag = 0;
    int argin;
    int runseeder = 0;
    int runrestart = 0;
    while(--argc > 0 && (*++argv)[0] == '-') {
      while((argin = *++argv[0])) {
        switch(argin) {
          case 's':
            runseeder = 1;
            break;
          case 'r':
            runrestart = 1;
            break;
          default:
            runseeder = 2;
            runrestart = 2;
            printf("bluebottle: illegal option %c\n", argin);
            argc = 0;
            break;
        }
      }
    }
    
    if(runseeder == 1) {
      printf("Seed particles according to parameters specified in");
      printf(" parts.config? (y/N)\n");
      fflush(stdout);
      int c = getchar();
      if (c == 'Y' || c == 'y') {
        seeder_read_input();
        return EXIT_SUCCESS;
      } else {
        printf("Please specify the desired parameters in parts.config\n\n");
        fflush(stdout);
        return EXIT_FAILURE;
      }
    } else if(runrestart == 1 && argc > 0) {
      printf("Usage restart simulation: bluebottle -r\n");
      return EXIT_FAILURE;
    } else if(runseeder == 2) {
      return EXIT_FAILURE;
    } else if(runrestart == 2) {
      return EXIT_FAILURE;
    } else {
      // read recorder config file
      recorder_read_config();

      // read simulation input configuration file
      printf("\nRunning bluebottle_0.1...\n\n");
      printf("Reading the domain and particle input files...\n\n");
      domain_read_input();
      parts_read_input(turb);
	  points_read_input();
	  scalar_read_input();
      fflush(stdout);
      //printf("EXPD: Using devices %d through %d.\n\n", dev_start, dev_end);
      //fflush(stdout);

      /********* Messy hack for taking advantage of CUDA_VISIBLE_DEVICES
       ********* treatment in SLURM resource manager. */

      // read the environment variable
      char *cvdin;
      cvdin = getenv("CUDA_VISIBLE_DEVICES");
      if(cvdin != NULL) {
        // number of devices
        int n_CUDA_VISIBLE_DEVICES = 0.5*(strlen(cvdin)+1.);
        // list of devices
        int *CUDA_VISIBLE_DEVICES = malloc(n_CUDA_VISIBLE_DEVICES*sizeof(int));
        // fill list of devices assuming single-character separation
        int j = 0;
        for(int i = 0; i < 2*n_CUDA_VISIBLE_DEVICES-1; i+=2) {
          CUDA_VISIBLE_DEVICES[j] = cvdin[i] - '0';
          j++;
        }
        // use the first device available (devices are re-indexed by CUDA)
        if(n_CUDA_VISIBLE_DEVICES > 0) {
          dev_start = 0;
          dev_end = 0;
        } else { // exit if things aren't just right
          printf("Environment variable CUDA_VISIBLE_DEVICES is empty:\n");
          printf("  a. To use the config files to specify the device number,\n");
          printf("     type 'unset CUDA_VISIBLE_DEVICES'\n");
          printf("  b. To use CUDA_VISIBLE_DEVICES to specify the device number,\n");
          printf("     type 'export CUDA_VISIBLE_DEVICES=N1,N2,...',\n");
          printf("     where N1,N2 are comma-separated device numbers\n");
          exit(EXIT_FAILURE);
        }
      }

      /********* End messy CUDA_VISIBLE_DEVICES hack. */

      if(runrestart != 1) {
        // start BICGSTAB recorder
        recorder_bicgstab_init("solver_expd.rec");
        #ifdef IMPLICIT
          // start Helmholtz recorder
          recorder_bicgstab_init("solver_helmholtz_expd.rec");
        #endif
        // start Lamb's coefficient recorder
        // commented out because it should now automatically init itself
        // from recorder_lamb(...) if the file doesn't already exist
        //recorder_lamb_init("lamb.rec");
      }

      // initialize the domain
      printf("Initializing domain variables...");
      fflush(stdout);
      int domain_init_flag = domain_init();
      printf("done.\n");
      fflush(stdout);
      if(domain_init_flag == EXIT_FAILURE) {
        printf("\nThe number of devices in DEV RANGE is insufficient\n");
        printf("for the given domain decomposition.  Exiting now.\n");
        return EXIT_FAILURE;
      }

      // set up the boundary condition config info to send to precursor
      expd_init_BC(np);

      // initialize the particles
      printf("Initializing particle variables...");
      fflush(stdout);
      int parts_init_flag = parts_init();
      int binDom_init_flag = binDom_init();
      printf("done.\n");
      fflush(stdout);
      if(parts_init_flag == EXIT_FAILURE) {
        printf("\nThe initial particle configuration is not allowed.\n");
        return EXIT_FAILURE;
      } else if(binDom_init_flag == EXIT_FAILURE) {
        printf("\nThe bin configuration is not allowed.\n");
        return EXIT_FAILURE;
      }

      // initialize the point bubbles
        printf("Initializing point variables...");
        fflush(stdout);
		int points_init_flag = points_init();
		printf("done.\n");
     	fflush(stdout);
      	if(points_init_flag == EXIT_FAILURE) {
		printf("\nThe initial point_particle configuration is not allowed.\n");
		return EXIT_FAILURE;
      }

	// initialize the scalar 
      	printf("Initializing scalar variables...");
      	fflush(stdout);
      	int scalar_init_flag = scalar_init();
		printf("done.\n");
      	fflush(stdout);
      	if(scalar_init_flag == EXIT_FAILURE) {
		printf("\nThe initial scalar configuration is not allowed.\n");
		return EXIT_FAILURE;
      }

		rec_scalar_ttime_out = 0;
		rec_point_particle_ttime_out = 0;

      // allocate device memory
      printf("Allocating domain CUDA device memory...");
      fflush(stdout);
      cuda_dom_malloc();
      printf("...done.\n");
      fflush(stdout);
      printf("Allocating particle CUDA device memory...");
      fflush(stdout);
      cuda_part_malloc();
      printf("...done.\n");
      fflush(stdout);
	   printf("Allocating bubble CUDA device memory...");
      fflush(stdout);
      cuda_point_malloc();
      printf("...done.\n");
      fflush(stdout);
      printf("Allocating scalar CUDA device memory...");
      fflush(stdout);
      cuda_scalar_malloc();
      printf("...done.\n");
      fflush(stdout);


      // copy host data to devices
      printf("Copying host domain data to devices...");
      fflush(stdout);
      cuda_dom_push();
      printf("done.\n");
      fflush(stdout);
      printf("Copying host particle data to devices...");
      fflush(stdout);
      cuda_part_push();
      printf("done.\n");
      fflush(stdout);
	  printf("Copying host particle data to devices...");
      fflush(stdout);
      cuda_point_push();
      printf("done.\n");
      fflush(stdout);
      printf("Copying host scalar data to devices...");
      fflush(stdout);
      cuda_scalar_push();
      printf("done.\n");
      fflush(stdout);
      printf("Seting up initial bubble numbers...");
      npoints = ninit;
      printf("done.\n");
      fflush(stdout);


      count_mem();

      // initialize ParaView VTK output PVD file
      if(runrestart != 1) {
        #ifdef DEBUG
          init_VTK_ghost();
        #else
          if(rec_paraview_dt > 0) {
            init_VTK();
          }
        #endif
      }

      // set up particles
      cuda_build_cages();
      cuda_part_pull();

      // run restart if requested
      if(runrestart == 1) {
        printf("\nRestart requested.\n\n");
        printf("Reading restart file...");
        fflush(stdout);
        in_restart();
        printf("done.\n");
        fflush(stdout);
        printf("Copying host domain data to devices...");
        fflush(stdout);
        cuda_dom_push();
        printf("done.\n");
        fflush(stdout);
        printf("Copying host particle data to devices...");
        fflush(stdout);
        cuda_part_push();
        printf("done.\n");
        fflush(stdout);
        cgns_grid();
		printf("Copying host point data to devices...");
        fflush(stdout);
        cuda_point_push();
        printf("done.\n");
        fflush(stdout);
		printf("Copying host scalar data to devices...");
        fflush(stdout);
        cuda_scalar_push();
        printf("done.\n");
        fflush(stdout);

        if(ttime >= duration) {
          printf("\n...simulation completed.\n");
          restart_stop = 1;
        }
      }

      #ifdef DEBUG
        // write config to screen
        printf("\n=====DEBUG");
        printf("================================");
        printf("======================================\n");
        fflush(stdout);
        cuda_dom_pull();
        cuda_part_pull();
        domain_show_config();
        parts_show_config();
        bin_show_config();
        printf("========================================");
        printf("========================================\n\n");
        fflush(stdout);
      #endif

      #ifdef TEST // run test code
        // ** note that some of these work only for DEV RANGE 0 0 **
        // test CUDA functionality
        printf("\n=====TEST");
        printf("=================================");
        printf("======================================\n");
        fflush(stdout);
        dt = 1.;
        dt0 = -1.;
        cuda_compute_forcing(&pid_int, &pid_back, Kp, Ki, Kd);
        rec_flow_field_stepnum_out = -1;
        rec_paraview_stepnum_out = -1;
        rec_particle_stepnum_out = -1;
        //rec_restart_stepnum_out = -1;
        rec_prec_stepnum_out = -1;
        cuda_part_pull();
        //cuda_BC_test();
        //cuda_U_star_test_exp();
        //cuda_U_star_test_cos();
        //cuda_project_test();
        //cuda_quad_interp_test();
        cuda_lamb_test();
        printf("========================================");
        printf("========================================\n\n");
        fflush(stdout);

      #else // run simulation
        // begin simulation
        printf("\n=====BLUEBOTTLE");
        printf("===========================");
        printf("======================================\n");
        fflush(stdout);

        // get initial dt; this is an extra check for the SHEAR initialization
        dt = cuda_find_dt();
		dt_sc = cuda_find_dt_sc(dt);
		real dt_mp = Dom.dz / (2. / 9. * 9800. * rinit * rinit);
		dt_sc = (dt_sc > dt_mp) ? dt_mp : dt_sc; 
		dt = dt_sc;
		printf("Time Step is %f %f \n",dt, dt_sc);
		fflush(stdout);

        // share this with the precursor domain
        expd_compare_dt(np, status);

        // update the boundary condition config info to share with precursor
        expd_update_BC(np, status);

        // apply boundary conditions to field variables
        if(nparts > 0) {
          cuda_part_BC();
        }
		printf("Particle BC\n");
		fflush(stdout);
		compute_scalar_BC();
		printf("1 Scalar BC\n");
		fflush(stdout);
		cuda_scalar_BC();
		printf("2 Scalar BC\n");
		fflush(stdout);
        cuda_dom_BC();
		printf("1 Dom BC\n");
		fflush(stdout);
        // write particle internal flow equal to solid body velocity
        cuda_parts_internal();
		printf("Internal Particle BC\n");
		fflush(stdout);
        cuda_dom_BC();
		printf("2 Dom BC\n");
		fflush(stdout);

        // write initial fields
        if(runrestart != 1) {
          cuda_dom_pull();
		printf("Dom Pull\n");
		fflush(stdout);
          cuda_part_pull();
		printf("Part Pull\n");
		fflush(stdout);
		  cuda_scalar_pull();
		printf("Scalar Pull\n");
		fflush(stdout);
		  cuda_point_pull();
		printf("Point Pull\n");
		fflush(stdout);

        #ifdef DEBUG
            printf("Writing ParaView file %d (t = %e)...",
            rec_paraview_stepnum_out, ttime);
            fflush(stdout);
            out_VTK_ghost();
            rec_paraview_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
        #else
          if(rec_flow_field_dt > 0) {
            printf("Writing flow field file t = %e...", ttime);
            fflush(stdout);
            cgns_grid();
            cgns_flow_field(rec_flow_field_dt);
            rec_flow_field_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
          }
          if(rec_particle_dt > 0) {
            printf("Writing particle file t = %e...", ttime);
            fflush(stdout);
            cgns_particles(rec_particle_dt);
            recorder_lamb("lamb.rec", 0);
            rec_particle_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
          }
		 if(rec_point_particle_dt > 0) {
            printf("Writing point particle file t = %e...", ttime);
            fflush(stdout);
            cgns_point_particles(rec_point_particle_dt);
            rec_point_particle_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
          }
          if(rec_scalar_field_dt > 0) {
            printf("Writing point particle file t = %e...", ttime);
            fflush(stdout);
            cgns_scalar_field(rec_scalar_field_dt);
            rec_scalar_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
          }
          if(rec_paraview_dt > 0) {
            printf("Writing ParaView file %d (t = %e)...",
              rec_paraview_stepnum_out, ttime);
            fflush(stdout);
            out_VTK();
            rec_paraview_stepnum_out++;
            printf("done.               \n");
            fflush(stdout);
          }

        #endif
        }

        /******************************************************************/
        /** Begin the main timestepping loop in the experimental domain. **/
        /******************************************************************/
        while(ttime <= duration) {
          ttime += dt;
          rec_flow_field_ttime_out += dt;
          rec_paraview_ttime_out += dt;
          rec_particle_ttime_out += dt;
          rec_restart_ttime_out += dt;
		  rec_point_particle_ttime_out += dt;
		  rec_scalar_ttime_out += dt;
          stepnum++;
          printf("EXPD: Time = %e of %e (dt = %e).\n", ttime, duration, dt);
          fflush(stdout);

          cuda_compute_forcing(&pid_int, &pid_back, Kp, Ki, Kd);
	  printf("Compute forcing\n");
	  fflush(stdout);
	  if(npoints > 0 && lpt_twoway > 0)
				  lpt_point_twoway_forcing();
	  printf("Two_way forcing\n");
	  fflush(stdout);
          compute_vel_BC();
	  printf("Compute Vel BC\n");
	  fflush(stdout);
          // update the boundary condition config info and share with precursor
          expd_update_BC(np, status);

          // TODO: save work by rebuilding only the cages that need to be rebuilt
          cuda_build_cages();

          int iter = 0;
          real iter_err = FLT_MAX;

          while(iter_err > lamb_residual) {  // iterate for Lamb's coefficients
            #ifndef BATCHRUN
              printf("  Iteration %d: ", iter);
              fflush(stdout);
            #endif

            // solve for U_star
            #ifndef IMPLICIT
              cuda_U_star_2();
            #else
              cuda_ustar_helmholtz(rank);
              cuda_vstar_helmholtz(rank);
              cuda_wstar_helmholtz(rank);
            #endif

            // apply boundary conditions to U_star
            if(nparts > 0) {
              cuda_part_BC_star();
            }
            cuda_dom_BC_star();
            // enforce solvability condition
            cuda_solvability();
            if(nparts > 0) {
              cuda_part_BC_star();
            }
            cuda_dom_BC_star();
            // solve for pressure
            cuda_PP_bicgstab(rank);
            cuda_dom_BC_phi();
            // solve for U
            cuda_project();
            // apply boundary conditions to field variables
            if(nparts > 0) {
              cuda_part_BC();
            }
            cuda_dom_BC();
            // update pressure
            cuda_update_p();
            if(nparts > 0) {
              cuda_part_BC();
              cuda_part_p_fill();
            }
            cuda_dom_BC_p();

            // update Lamb's coefficients
            cuda_move_parts_sub();
            cuda_Lamb();

            #ifdef STEPS // force no sub-timestep iteration
              iter_err = -1;
            #else
              // check error between this set of coefficients and previous set
              // of coefficients
              iter_err = cuda_lamb_err();
              // TODO: write error to lamb.rec
            #endif
            #ifndef BATCHRUN
              printf("Error = %f\r", iter_err);
            #endif
            iter++;
            // check iteration limit
            if(iter == lamb_max_iter) {
              //lambflag = !lambflag;
              //printf("Reached the maximum number of Lamb's");
              //printf(" coefficient iterations.");
              //printf(" CONTINUING simulation.\n");
              break;
            }
          }

          printf("  The Lamb's coefficients converged in");
          printf(" %d iterations.\n", iter);
	  fflush(stdout);

          if(!lambflag) {
            // update particle position
            cuda_move_parts();

            // write particle internal flow equal to solid body velocity
            cuda_parts_internal();
            cuda_dom_BC();

            // store u, conv, and coeffs for use in next timestep
            cuda_store_u();
            if(nparts > 0)
              cuda_store_coeffs();

            // compute div(U)
            //cuda_div_U();
		/* Use to move point bubbles and update concentration field */
			if(npoints > 0)
		   	 cuda_flow_stress();
			bubble_generate();
		//	points_show_config();
	    	cuda_find_DIFF_dt_points();
	    	//move bubbles and update scalar fields
	    	if(npoints > 0)
		    cuda_move_points();
	    	compute_scalar_BC();
			printf("Compute Scalar BC\n");
			fflush(stdout);
	    	cuda_scalar_BC();
			printf("Apply Scalar BC\n");
			fflush(stdout);
	    	cuda_scalar_advance();
	    	printf("Scalar Advance\n");
			fflush(stdout);
	    	cuda_store_scalar();
	    	printf("Scalar Store\n");
			fflush(stdout);

      	// compute next timestep size
       		dt0 = dt;
       		dt = cuda_find_dt();
	   		dt_sc = cuda_find_dt_sc(dt);
	    	dt_sc = cuda_find_dt_points(dt_sc);
	    	dt = dt_sc;
		
            // compare this timestep size to that in the precursor and
            // and synchronize the result
            expd_compare_dt(np, status);

          } else {
            return EXIT_FAILURE;
          }

          if(rec_flow_field_dt > 0) {
            if(rec_flow_field_ttime_out >= rec_flow_field_dt) {
              // pull back data and write fields
              cuda_dom_pull();
              cuda_part_pull();
              #ifndef BATCHRUN
                printf("  Writing flow field file t = %e...                  \r",
                  ttime);
                fflush(stdout);
              #endif
              cgns_flow_field(rec_flow_field_dt);
              printf("  Writing flow field file t = %e...done.\n", ttime);
              fflush(stdout);
              rec_flow_field_ttime_out = rec_flow_field_ttime_out
                - rec_flow_field_dt;
              rec_flow_field_stepnum_out++;
            }
          }
          if(rec_paraview_dt > 0) {
            if(rec_paraview_ttime_out >= rec_paraview_dt) {
              // pull back data and write fields
              cuda_dom_pull();
              cuda_part_pull();
              #ifndef BATCHRUN
                printf("  Writing ParaView output file");
                printf(" %d (t = %e)...                  \r",
                  rec_paraview_stepnum_out, ttime);
                fflush(stdout);
              #endif
              #ifdef DEBUG
                out_VTK_ghost();
              #else
                out_VTK();
              #endif
              printf("  Writing ParaView file %d (t = %e)...done.\n",
                rec_paraview_stepnum_out, ttime);
              rec_paraview_stepnum_out++;
              fflush(stdout);
              rec_paraview_ttime_out = rec_paraview_ttime_out - rec_paraview_dt;
            }
          }
          if(rec_particle_dt > 0) {
            if(rec_particle_ttime_out >= rec_particle_dt) {
              // pull back data and write fields
              cuda_part_pull();
              #ifndef BATCHRUN
                printf("  Writing particle file t = %e...                  \r",
                  ttime);
                fflush(stdout);
              #endif
              #ifdef DEBUG
                recorder_lamb("lamb.rec", iter);
              #else
                cgns_particles(rec_particle_dt);
                recorder_lamb("lamb.rec", iter);
              #endif
              printf("  Writing particle file t = %e...done.\n", ttime);
              fflush(stdout);
              rec_particle_ttime_out = rec_particle_ttime_out - rec_particle_dt;
              rec_particle_stepnum_out++;
            }
          }
			if(rec_point_particle_dt > 0) {
            if(rec_point_particle_ttime_out >= rec_point_particle_dt) {
              // pull back data and write fields
              cuda_point_pull();
              #ifndef BATCHRUN
                printf("  Writing bubble file t = %e...\n", ttime);
                fflush(stdout);
              #endif
                cgns_point_particles(rec_point_particle_dt);
              printf("  Writing bubble file t = %e...done.\n", ttime);
              fflush(stdout);
              rec_point_particle_ttime_out = rec_point_particle_ttime_out - rec_point_particle_dt;
              rec_point_particle_stepnum_out++;
            }
          }
			if(rec_scalar_field_dt > 0) {
	    	if(rec_scalar_ttime_out >= rec_scalar_field_dt) {
	      // pull back data and write fields
	      	cuda_scalar_pull();
	      	#ifndef BATCHRUN
			printf("  Writing scalar file t = %e...done.\n", ttime);
			fflush(stdout);
	      	#endif               
	      	cgns_scalar_field(rec_scalar_field_dt);
			printf("  Writing scalar file t = %e...done.\n", ttime);
			fflush(stdout);
	      	rec_scalar_ttime_out = rec_scalar_ttime_out - rec_scalar_field_dt;
	      	rec_scalar_stepnum_out++;
	    }
	  }


          // write a restart file and exit when the time is appropriate
          timestepwalltime = time(NULL);
          diffwalltime = difftime(timestepwalltime, startwalltime);
          int rest_com = (rec_restart_dt > 0)
            && ((real)diffwalltime/60. > rec_restart_dt);

          // communicate write restart with precursor domain
          expd_comm_restart_write(np, rest_com);

          if(rest_com) {
            printf("  Writing restart file (t = %e)...", ttime);
            fflush(stdout);
            cuda_dom_pull();
            cuda_part_pull();
            out_restart();
            printf("done.               \n");
            fflush(stdout);
            rec_restart_ttime_out = rec_restart_ttime_out - rec_restart_dt;
            startwalltime = time(NULL);
            if(rec_restart_stop)
              break; // exit!
          }

          // check for blow-up condition
          if(dt < 1e-20) {
            printf("The solution has diverged.  Ending simulation.              \n");
            return EXIT_FAILURE;
          }
        }

        if(rec_restart_dt > 0 && ttime >= duration && !restart_stop) {
          printf("  Writing final restart file (t = %e)...", ttime);
          fflush(stdout);
          cuda_dom_pull();
          cuda_part_pull();
          out_restart();
          printf("done.               \n");
          fflush(stdout);
          rec_restart_ttime_out = rec_restart_ttime_out - rec_restart_dt;
          startwalltime = time(NULL);
        }

        printf("========================================");
        printf("========================================\n\n");
        fflush(stdout);
      #endif

      // clean up devices
      printf("Cleaning up domain data on devices...");
      fflush(stdout);
      cuda_dom_free();
      printf("done.     \n");
      fflush(stdout);
      printf("Cleaning up particle data on devices...");
      fflush(stdout);
      cuda_part_free();
      printf("done.\n");
      fflush(stdout);
	  printf("Cleaning up bubble data on devices...");
      fflush(stdout);
      cuda_point_free();
      printf("done.\n");
      fflush(stdout);
      printf("Cleaning up scalar data on devices...");
      fflush(stdout);
      cuda_scalar_free();
      printf("done.\n");
      fflush(stdout);


      // clean up host
      printf("Cleaning up particles...");
      fflush(stdout);
      parts_clean();
      printf("done.\n");
      fflush(stdout);
      printf("Cleaning up domain...");
      fflush(stdout);
      domain_clean();
      printf("done.\n");
      fflush(stdout);
	  printf("Cleaning up point particles...");
      fflush(stdout);
      points_clean();
      printf("done.\n");
      fflush(stdout);
      printf("Cleaning up scalar...");
      fflush(stdout);
      scalar_clean();
      printf("done.\n");
      fflush(stdout);


      printf("\n...bluebottle_0.1 done.\n\n");
    }
  } else {
    int turb = 1;   // boolean
    MPI_Status status; // for MPI communication

    // parse command-line arguments
    // if none given, run normally
    // if -s given, run seeder program only
    // if anything else given, exit
    // this code comes from K&R, p. 117
    int argin;
    int runrestart = 0;
    while(--argc > 0 && (*++argv)[0] == '-') {
      while((argin = *++argv[0])) {
        switch(argin) {
          case 'r':
            runrestart = 1;
            break;
          default:
            runrestart = 2;
            printf("bluebottle: illegal option %c\n", argin);
            argc = 0;
            break;
        }
      }
    }

    // read simulation input configuration file
    recorder_read_config();
    turb_read_input();
    parts_read_input(turb);

    //printf("PREC: Using devices %d through %d.\n\n", dev_start, dev_end);
    //fflush(stdout);

    /********* Messy hack for taking advantage of CUDA_VISIBLE_DEVICES
     ********* treatment in SLURM resource manager. */

    // read th environment variable
    char *cvdin;
    cvdin = getenv("CUDA_VISIBLE_DEVICES");
    if(cvdin != NULL) {
      // number of devices
      int n_CUDA_VISIBLE_DEVICES = 0.5*(strlen(cvdin)+1.);
      // list of devices
      int *CUDA_VISIBLE_DEVICES = malloc(n_CUDA_VISIBLE_DEVICES*sizeof(int));
      // fill list of devices assuming single-character separation
      int j = 0;
      for(int i = 0; i < 2*n_CUDA_VISIBLE_DEVICES-1; i+=2) {
        CUDA_VISIBLE_DEVICES[j] = cvdin[i] - '0';
        j++;
      }
      // use the second device available (devices are re-indexed by CUDA)
      if(n_CUDA_VISIBLE_DEVICES > 1) {
        dev_start = 1;
        dev_end = 1;
      // if only one device is available, try to use it
      } else if(n_CUDA_VISIBLE_DEVICES > 0) {
        dev_start = 0;
        dev_end = 0;
      } else { // exit if things aren't just right
        printf("Environment variable CUDA_VISIBLE_DEVICES is empty:\n");
        printf("  a. To use the config files to specify the device number,\n");
        printf("     type 'unset CUDA_VISIBLE_DEVICES'\n");
        printf("  b. To use CUDA_VISIBLE_DEVICES to specify the device number,\n");
        printf("     type 'export CUDA_VISIBLE_DEVICES=N1,N2,...',\n");
        printf("     where N1,N2 are comma-separated device numbers\n");
        exit(EXIT_FAILURE);
      }
    }

    /********* End messy CUDA_VISIBLE_DEVICES hack. */

    if(runrestart != 1) {
      // start BICGSTAB recorder
      recorder_bicgstab_init("solver_prec.rec");
      #ifdef IMPLICIT
        // start Helmholtz recorder
        recorder_bicgstab_init("solver_helmholtz_prec.rec");
      #endif
    }

    // initialize the domain
    int domain_init_flag = domain_init_turb();
    if(domain_init_flag == EXIT_FAILURE) {
      printf("\nThe number of devices in DEV RANGE is insufficient\n");
      printf("for the given turbulence domain decomposition.  Exiting now.\n");
      return EXIT_FAILURE;
    }

    // receive the boundary condition config info from MASTER
    prec_init_BC(np, rank, status);

    // initialize the particles
    int parts_init_flag = parts_init();
    int binDom_init_flag = binDom_init();
    if(parts_init_flag == EXIT_FAILURE) {
      printf("\nThe initial particle configuration is not allowed.\n");
      return EXIT_FAILURE;
    } else if(binDom_init_flag == EXIT_FAILURE) {
      printf("\nThe bin configuration is not allowed.\n");
      return EXIT_FAILURE;
    }

    // allocate device memory
    cuda_dom_malloc();
    cuda_part_malloc();

    // copy host data to devices
    cuda_dom_push();
    cuda_dom_turb_planes_push(bc_flow_configs);
    cuda_part_push();

    //count_mem();

    // initialize ParaView VTK output PVD file
    if(rec_prec_dt > 0) {
      init_VTK_turb();
    }

    real rec_prec_ttime_out = 0.;
    real rec_restart_ttime_out = 0.;

    cuda_build_cages();
    cuda_part_pull();

    // run restart if requested
    if(runrestart == 1) {
      printf("\nRestart requested.\n\n");
      printf("Reading restart file...");
      fflush(stdout);
      cgns_turb_grid();
      in_restart_turb();
      printf("done.\n");
      fflush(stdout);
      printf("Copying host domain data to devices...");
      fflush(stdout);
      cuda_dom_push();
      printf("done.\n");
      fflush(stdout);
      printf("Copying host particle data to devices...");
      fflush(stdout);
      cuda_part_push();
      printf("done.\n");
      fflush(stdout);
      cgns_grid();
    }

    // initialize timestep size since turbulent velocity is nonzero
    dt = cuda_find_dt();

    // share this dt with the experimental domain
    prec_compare_dt(np, rank, status);

    // update the boundary conditions according to the experimental domain
    prec_update_BC(np, rank, status);

    // begin simulation
    // apply boundary conditions to field variables
    cuda_dom_BC();

    // write initial fields
    if(rec_prec_dt > 0 && runrestart != 1) {
      cuda_dom_pull();
      printf("Writing precursor file %d (t = %e)...",
        rec_prec_stepnum_out, ttime);
      fflush(stdout);
      out_VTK_turb();
      rec_prec_stepnum_out++;
      printf("done.               \n");
      fflush(stdout);
    }
    if(rec_prec_flow_field_dt > 0 && runrestart != 1) {
      cuda_dom_pull();
      printf("Writing turbulence flow field file t = %e...", ttime);
      fflush(stdout);
      cgns_turb_grid();
      cgns_turb_flow_field(rec_prec_flow_field_dt);
      rec_prec_flow_field_stepnum_out++;
      printf("done.               \n");
      fflush(stdout);
    }
    /***************************************************************/
    /** Begin the main timestepping loop in the precursor domain. **/
    /***************************************************************/
    while(ttime <= duration) {
      ttime += dt;
      rec_prec_flow_field_ttime_out += dt;
      rec_prec_ttime_out += dt;
      rec_restart_ttime_out += dt;
      stepnum++;
      printf("PREC: Time = %e of %e (dt = %e).\n", ttime, duration, dt);

      cuda_compute_forcing(&pid_int, &pid_back, Kp, Ki, Kd);
      cuda_compute_turb_forcing();
      compute_vel_BC();

      // solve for U_star
      #ifndef IMPLICIT
        cuda_U_star_2();
      #else
        cuda_ustar_helmholtz(rank);
        cuda_vstar_helmholtz(rank);
        cuda_wstar_helmholtz(rank);
      #endif
      // apply boundary conditions to U_star
      cuda_dom_BC_star();
      // force solvability condition
      cuda_solvability();
      cuda_dom_BC_star();
      // solve for pressure
      cuda_PP_bicgstab(rank);
      cuda_dom_BC_phi();
      // solve for U
      cuda_project();
      // apply boundary conditions to field variables
      cuda_dom_BC();
      // update pressure
      cuda_update_p();
      cuda_dom_BC_p();

      cuda_store_u();

      // compute next timestep size
      dt0 = dt;
      dt = cuda_find_dt();
     
      // check for blow-up condition
      if(dt < 1e-20) {
        printf("The solution has diverged.  Ending simulation.              \n");
        return EXIT_FAILURE;
      }

      // communicate the boundary condition with the experimental domain
      prec_send_BC(np, rank, status);

      if(rec_prec_dt > 0) {
        if(rec_prec_ttime_out >= rec_prec_dt) {
          // pull back data and write fields
          cuda_dom_pull();
          #ifndef BATCHRUN
            printf("  Writing precursor output file");
            printf(" %d (t = %e)...                  \r",
              rec_prec_stepnum_out, ttime);
            fflush(stdout);
          #endif
          #ifdef DEBUG
            out_VTK_ghost();
          #else
            out_VTK_turb();
          #endif
          printf("  Writing precursor file %d (t = %e)...done.\n",
            rec_prec_stepnum_out, ttime);
          rec_prec_stepnum_out++;
          fflush(stdout);
          rec_prec_ttime_out = rec_prec_ttime_out - rec_prec_dt;
        }
      }
      if(rec_prec_flow_field_dt > 0) {
       if(rec_prec_flow_field_ttime_out >= rec_prec_flow_field_dt) {
          // pull back data and write fields
          cuda_dom_pull();
          cgns_turb_flow_field(rec_prec_flow_field_dt);
          printf("  Writing precursor flow field file t = %e...done.\n",
            ttime);
          fflush(stdout);
          rec_prec_flow_field_ttime_out = rec_prec_flow_field_ttime_out
            - rec_prec_flow_field_dt;
          rec_prec_flow_field_stepnum_out++;
        }
      }
      // write a restart file and exit when the time is appropriate
      int rest_com;
      prec_comm_restart_write(np, &rest_com, rank, status);
      if(rest_com) {
        printf("  Writing precursor restart file (t = %e)...", ttime);
        fflush(stdout);
        cuda_dom_pull();
        out_restart_turb();
        printf("done.               \n");
        fflush(stdout);
        rec_restart_ttime_out = rec_restart_ttime_out - rec_restart_dt;
        startwalltime = time(NULL);
        if(rec_restart_stop)
          break; // exit!
      }
    }

    if(rec_restart_dt > 0 && ttime >= duration && !restart_stop) {
      printf("  Writing final precursor restart file (t = %e)...", ttime);
      fflush(stdout);
      cuda_dom_pull();
      out_restart_turb();
      printf("done.               \n");
      fflush(stdout);
      rec_restart_ttime_out = rec_restart_ttime_out - rec_restart_dt;
      startwalltime = time(NULL);
    }

    // clean up devices
    cuda_dom_free();
    cuda_part_free();

    // clean up host
    parts_clean();
    domain_clean();
  }

  // finalize MPI
  MPI_Finalize();

  if(restart_stop) return EXIT_FAILURE;
  else return EXIT_SUCCESS;
}
Esempio n. 3
0
//This subroutine delete the old particle&scalar, and inject new particle and scalar into the flow field based on point.config&&scalar.config
int points_scalar_inject(void)
{

//free points on device and host
      cuda_point_free();
      points_clean();
//free scalar on device and host
      cuda_scalar_free();
      scalar_clean();

//read and initialize points	
      points_read_input();
      int points_init_flag = points_init();
      fflush(stdout);
      if(points_init_flag == EXIT_FAILURE) {
        printf("\nThe initial point_particle configuration is not allowed.\n");
        return EXIT_FAILURE;
      }
//read and initialize scalar
      scalar_read_input();
    // initialize the scalar 
      int scalar_init_flag = scalar_init();
      fflush(stdout);
      if(scalar_init_flag == EXIT_FAILURE) {
        printf("\nThe initial scalar configuration is not allowed.\n");
        return EXIT_FAILURE;
      }

//malloc device memory of scalar and point, and push host data to device
      cuda_scalar_malloc();
      cuda_scalar_push();
      
      cuda_point_malloc();
      cuda_point_push();

if(npoints>0) cuda_flow_stress();

//The domain velocity has already been pushed to device
//Match device point velocity with flow field based on point position, which is copied from host
      match_point_vel_with_flow();
//pull the new point infomation to host
          cuda_point_pull();
//Initialize time again
	ttime=0.f;
//write initial field 
          cuda_dom_pull();
          if(rec_flow_field_dt > 0) {
            cgns_grid();
            cgns_flow_field(rec_flow_field_dt);
            rec_flow_field_stepnum_out++;
//printf("\nrec_flow %d\n", rec_flow_field_stepnum_out);
          }
          if(rec_point_particle_dt > 0) {
            cgns_point_particles(rec_point_particle_dt);
            rec_point_particle_stepnum_out++;
          }
      
         if(rec_scalar_field_dt > 0) {
            cgns_scalar_field(rec_scalar_field_dt);
            rec_scalar_stepnum_out++;
          }
	return EXIT_SUCCESS;        

}
Esempio n. 4
0
extern void interp_runCommand(Env* env, TPA_Instruction* inst) {
	char* str;
	Function* f;
	Points* points;
	PointItem* pointItem;
	FILE* out = stdout;
	switch(inst->kind) {
        case PA_IK_Expr:
            f = function_createWithFunctionTree(TPAExpr_toFunctionTree(inst->u.expr.expr, env));
            addFunction(env, inst->u.expr.name, f);
            break;

				case PA_IK_Table:
					f = function_createWithTruthTable(TPAExpr_toTruthTable(inst->u.table.vals));
					addFunction(env, inst->u.table.name, f);
					break;

        case PA_IK_Print:
            f = interp_getFunctionByName(env, inst->u.print.name);
            if(f==0) {
              fprintf(stderr,"Fonction inconnue\n");
            }
            else {
							out = stdout;

							if(inst->u.print.filename) {
								str = calloc(strlen(inst->u.print.filename)+1, sizeof(*str));
								strcpy(str, inst->u.print.filename+1);
								str[strlen(str)-1] = 0;
								out = file_fopenOutput(str);
								if(!out) {
									printf("Fallback on stdout print.\n");
									out = stdout;
								}
							}

							switch(inst->u.print.fmt) {
								case PA_PF_expr:
									function_print(f, out);
									break;

								case PA_PF_bdd:
									function_printAsBDD(f, out);
									break;

								case PA_PF_table:
									function_printAsTruthTable(f, out);
									break;

								case PA_PF_disjonctive:
									function_printAsDNF(f, out);
									break;

								case PA_PF_dot:
									function_printAsTree(f, out);
									break;

								case PA_PF_karnaugh:
									function_printAsKarnaugh(f, out);
									break;
							}

							if(out!=stdout)
								fclose(out);
            }
            break;
        case PA_IK_Point:
                // u.print.name - point name
                // u->u.point:
                // char*      name; // nom de l'ensemble
                // char       ope;  // operateur: '=':= ; '+':+= ; '-':-=
                // TPA_Expr** vals; // les valeurs du point:
                points = interp_getPointsByName(env,inst->u.point.name);
                if (points == 0) {
                    points = points_init();
                    addPoints(env, inst->u.expr.name, points);
                }
                if (interp_pointsOperation(points,inst->u.point.name,inst->u.point.ope,inst->u.point.vals) == 0)
                	fprintf(stderr, "La taille de point est incompatible avec l'ensemble\n");
				else 
	                points_print(points, out);
                break;
        case PA_IK_EvalEns:
					
					points = interp_getPointsByName(env,inst->u.evalens.ens);
          if (points == 0) {
						fprintf(stderr, "Ensemble des points inconnue\n");
            break;
					}
					pointItem = points->point;
					if (pointItem == 0) {
						fprintf(stderr, "L'ensemble des points est vide\n");
						break;
					}
					
					f = interp_getFunctionByName(env, inst->u.evalens.name);
					if (f == 0) {
						fprintf(stderr,"Fonction inconnue\n");
							break;
					}
					
					if (points_getDim(points) != function_getVarsLength(f)) {
						fprintf(stderr,"Dimension des points et de la fonction incompatible.\n");
							break;					
					}
					
					do {
						function_printEvalPoint(f,pointItem->p,out);					
						pointItem = pointItem->next;
					} while (pointItem != 0);
					break;
				
				f = interp_getFunctionByName(env, inst->u.evalens.name);
				if (f == 0) {
					fprintf(stderr,"Fonction inconnue\n");
				  	break;
				}
				
				if (points_getDim(points) != function_getVarsLength(f)) {
					fprintf(stderr,"Point vector dim and function vars number mismatch.\n");
				  	break;					
				}
								
				do {
					function_printEvalPoint(f,pointItem->p,out);					
					pointItem = pointItem->next;
				} while (pointItem != 0);

                break;
	    case PA_IK_EvalPoint:
	  		function_printEvalPoint(
	  			interp_getFunctionByName(env, inst->u.evalpoint.name),
	  			TPAExpr_toPoint(inst->u.evalpoint.vals),out
	  		);
        	break;
				
        default:
            fprintf(stderr," Instruction inconnue\n");
            break;
    }
}