Exemplo n.º 1
0
int opal_crs_blcr_enable_checkpoint(void)
{
    opal_output_verbose(10, mca_crs_blcr_component.super.output_handle,
                        "crs:blcr: enable_checkpoint()");
    /*
     * Leave the BLCR Critical Section
     */
    cr_leave_cs(client_id);

    return OPAL_SUCCESS;
}
Exemplo n.º 2
0
static void
on_child_exit(int signum, siginfo_t *siginfo, void *arg)
{
	int status;

	/*
  	 * if srun_cr is checkpoint/restart-ed after srun exited,
  	 * srun_pid will be the pid of the new srun.
	 */
	cr_enter_cs(cr_id);
	if (waitpid(srun_pid, &status, WNOHANG) == srun_pid) {
		verbose("srun(%d) exited, status: %d", srun_pid, status);
		mimic_exit(status);
	}
	kill(srun_pid, SIGKILL);
	cr_leave_cs(cr_id);
}
Exemplo n.º 3
0
int
main(int argc, char **argv)
{
	int debug_level, sig, srun_fd;
	struct sigaction sa;
	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
	struct sockaddr_un ca;
	unsigned int ca_len = sizeof(ca);

	atexit(remove_listen_socket);

	/* copied from srun */
	debug_level = _slurm_debug_env_val();
	logopt.stderr_level += debug_level;
	log_init(xbasename(argv[0]), logopt, 0, NULL);

	if (init_srun_argv(argc, argv)) {
		fatal("failed to initialize arguments for running srun");
	}

	if ((cr_id = cr_init()) < 0) {
		fatal("failed to initialize libcr: %s", cr_strerror(errno));
	}
	(void)cr_register_callback(cr_callback, NULL, CR_THREAD_CONTEXT);

	/* forward signals. copied from cr_restart */
	sa.sa_sigaction = signal_child;
	sa.sa_flags = SA_RESTART | SA_NODEFER | SA_SIGINFO;
	sigemptyset(&sa.sa_mask);
	for (sig = 0;  sig < _NSIG; sig ++) {
		if (sig == SIGSTOP ||
		    sig == SIGKILL ||
		    sig == SIGCHLD)
			continue;
		sigaction(sig, &sa, NULL);
	}
	sa.sa_sigaction = on_child_exit;
	sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP;
	sigaction(SIGCHLD, &sa, NULL);

	cr_enter_cs(cr_id); /* BEGIN CS: avoid race condition of whether srun is forked */
	if ( fork_exec_srun() ) {
		fatal("failed fork/exec/wait srun");
	}
	cr_leave_cs(cr_id); /* END CS */

	while (1) {
		pthread_mutex_lock(&step_launch_mutex);
		while (step_launched) {
			/* just avoid busy waiting */
			pthread_cond_wait(&step_launch_cond,
					  &step_launch_mutex);
		}
		pthread_mutex_unlock(&step_launch_mutex);

		if (_wait_for_srun_connect() < 0)
			continue;

		cr_enter_cs(cr_id); /* BEGIN CS: checkpoint(callback) will be delayed */

		srun_fd = accept(listen_fd, (struct sockaddr*)&ca, &ca_len);
		if (srun_fd < 0) {
			/* restarted before enter CS. socket will not be restored */
			if (errno == EBADF) {
				cr_leave_cs(cr_id);
				continue;
			} else {
				fatal("failed to accept socket: %m");
			}
		}

		_read_info_from_srun(srun_fd);
		close(srun_fd);

		step_launched = 1;
		debug2("step launched");

		cr_leave_cs(cr_id); /* END CS */
	}

	return 0;
}
int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   int ierr = 0;

   hypre_SMGResidualData *residual_data = (hypre_SMGResidualData *)residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   int                     Ai;
   int                     xi;
   int                     bi;
   int                     ri;
                         
   double                 *Ap;
   double                 *xp;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   int                     stencil_size;

   int                     compute_i, i, j, si;
   int                     loopi, loopj, loopk;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  for (si = 0; si < stencil_size; si++)
                  {
                     Ap = hypre_StructMatrixBoxData(A, i, si);
                     xp = hypre_StructVectorBoxData(x, i) +
                        hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);

                     hypre_BoxGetStrideSize(compute_box, base_stride,
                                            loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#if 0                     
/*  The following portion is preprocessed to be handled by ROSE outliner */
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
                           rp[ri] -= Ap[Ai] * xp[xi];
                        }
                     hypre_BoxLoop3End(Ai, xi, ri);
#else
                    for (hypre__block = 0; hypre__block < hypre__num_blocks;
                         hypre__block++)
                      {
                        loopi = 0;
                        loopj = 0;
                        loopk = 0;
                        hypre__nx = hypre__mx;
                        hypre__ny = hypre__my;
                        hypre__nz = hypre__mz;
                        if (hypre__num_blocks > 1)
                          {
                            if (hypre__dir == 0)
                              {
                                loopi =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nx =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 1)
                              {
                                loopj =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__ny =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 2)
                              {
                                loopk =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nz =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                          };
                        Ai =
                          hypre__i1start + loopi * hypre__sx1 +
                          loopj * hypre__sy1 + loopk * hypre__sz1;
                        xi =
                          hypre__i2start + loopi * hypre__sx2 +
                          loopj * hypre__sy2 + loopk * hypre__sz2;
                        ri =
                          hypre__i3start + loopi * hypre__sx3 +
                          loopj * hypre__sy3 + loopk * hypre__sz3;

                          //begin of the loop
#if 0                          //
                        for (loopk = 0; loopk < hypre__nz; loopk++)
                          {
                            for (loopj = 0; loopj < hypre__ny; loopj++)
                              {
                                for (loopi = 0; loopi < hypre__nx; loopi++)
                                  {
                                    {
                                      rp[ri] -= Ap[Ai] * xp[xi];
                                    }
                                    Ai += hypre__sx1;
                                    xi += hypre__sx2;
                                    ri += hypre__sx3;
                                  }
                                Ai += hypre__sy1 - hypre__nx * hypre__sx1;
                                xi += hypre__sy2 - hypre__nx * hypre__sx2;
                                ri += hypre__sy3 - hypre__nx * hypre__sx3;
                              }
                            Ai += hypre__sz1 - hypre__ny * hypre__sy1;
                            xi += hypre__sz2 - hypre__ny * hypre__sy2;
                            ri += hypre__sz3 - hypre__ny * hypre__sy3;
                          } // end of the loop
#else

#if BLCR_CHECKPOINTING
                        // Only checkpoint it at the first occurrance.
                        if (g_checkpoint_flag == 0)
                        {
                          int err;
                          cr_checkpoint_args_t cr_args;  
                          cr_checkpoint_handle_t cr_handle;
                          cr_initialize_checkpoint_args_t(&cr_args);
                          cr_args.cr_scope = CR_SCOPE_PROC;// a process
                          cr_args.cr_target = 0; //self
                          cr_args.cr_signal = SIGKILL; // kill after checkpointing
                          cr_args.cr_fd = open("dump.yy", O_WRONLY|O_CREAT|O_LARGEFILE, 0400);
                          if (cr_args.cr_fd < 0) {
                              printf("Error: cannot open file for checkpoiting context\n");
                              abort();
                          }

                          g_checkpoint_flag ++;
                          printf("Checkpoiting: starting here ..\n");

                          err = cr_request_checkpoint(&cr_args, &cr_handle);
                          if (err < 0) {
                            printf("cannot request checkpoining! err=%d\n",err);
                            abort();
                          }
                          // block until the request is served
                          cr_enter_cs(cr);
                          cr_leave_cs(cr);

                          printf("Checkpoiting: restarting here ..\n");
                        }
#endif

                           OUT__1__6755__(&Ai,&xi,&ri,&Ap,&xp,&rp,&loopi,&loopj,&loopk,&hypre__sx1,&hypre__sy1,&hypre__sz1,&hypre__sx2,&hypre__sy2,&hypre__sz2,&hypre__sx3,&hypre__sy3,&hypre__sz3,&hypre__nx,&hypre__ny,&hypre__nz);

#endif
                      }
                  };