Beispiel #1
0
/*
 * Processes all the tasks put in the work queue.
 */
static 
void *empty_workQ(void *argin)
{
  int        tid;
  proc_t    *procinfo;
  val_t     *Wstruct;
  vec_t     *Zstruct;
  tol_t     *tolstruct;
  workQ_t   *workQ;
  counter_t *num_left;
  retrieve_auxarg3
  ((auxarg3_t*)argin, &tid, &procinfo, &Wstruct,
   &Zstruct, &tolstruct, &workQ, &num_left);

  int n = Wstruct->n;

  /* max. needed double precision work space: odr1v */
  double *work = (double*)malloc(4*n*sizeof(double));
  assert(work != NULL);

  /* max. needed double precision work space: odrrb */
  int *iwork = (int*)malloc(2*n*sizeof(int));
  assert(iwork != NULL);

  /* while loop to empty the work queue */
  while (PMR_get_counter_value(num_left) > 0) {
    /* empty r-queue before processing other tasks */
    PMR_process_r_queue
    (tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork);

    task_t *task = PMR_remove_task_at_front(workQ->s_queue);
    if ( task != NULL ) {
      assert(task->flag == SINGLETON_TASK_FLAG);

      PMR_process_s_task
      ((singleton_t*)task->data, tid, procinfo,
       Wstruct, Zstruct, tolstruct, num_left, work, iwork);
      free(task);
      continue;
    }
    
    task = PMR_remove_task_at_front(workQ->c_queue);
    if ( task != NULL ) {
      assert(task->flag == CLUSTER_TASK_FLAG);

      PMR_process_c_task
      ((cluster_t*)task->data, tid, procinfo,
       Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork);
      free(task);
      continue;
    }
  } /* end while */

  free(work);
  free(iwork);

  return NULL;
}
Beispiel #2
0
/* TODO: Refactor this routine */
static inline 
int create_subtasks
(cluster_t *cl, int tid, proc_t *procinfo, 
 rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
 workQ_t *workQ, counter_t *num_left)
{
  /* From inputs */
  int              cl_begin  = cl->begin;
  int              cl_end    = cl->end;
  int              depth     = cl->depth;
  int              bl_begin  = cl->bl_begin;
  int              bl_end    = cl->bl_end;
  int              bl_size   = bl_end - bl_begin + 1;
  double           bl_spdiam = cl->bl_spdiam;
  double           lgap;

  int  pid       = procinfo->pid;
  int  nproc     = procinfo->nproc;
  int  nthreads  = procinfo->nthreads;
  bool proc_involved=true;

  double *restrict Wgap      = Wstruct->Wgap;
  double *restrict Wshifted  = Wstruct->Wshifted;
  int    *restrict iproc     = Wstruct->iproc;

  int              ldz       = Zstruct->ldz;
  double *restrict Z         = Zstruct->Z;
  int    *restrict Zindex    = Zstruct->Zindex;

  /* others */
  int    i, l, k;
  int    max_size;
  task_t *task;
  bool   task_inserted;
  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
  int    sn_first, sn_last, sn_size;
  rrr_t  *RRR_parent;
  int    new_lpid, new_rpid;
  double *restrict D_parent;
  double *restrict L_parent;
  int    my_first, my_last;
  bool   copy_parent_rrr;


  max_size = fmax(1, PMR_get_counter_value(num_left) /
		     (fmin(depth+1,4)*nthreads) );
  task_inserted = true;
  new_first = cl_begin;
  for (i=cl_begin; i<=cl_end; i++) {    

    if ( i == cl_end )
      new_last = i;
    else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
      new_last = i;
    else
      continue;

    new_size = new_last - new_first + 1;

    if (new_size == 1) {
      /* singleton was found */
      
      if (new_first==cl_begin || task_inserted==true) {
	/* initialize new singleton task */
	sn_first = new_first;
	sn_last  = new_first;
	sn_size  = 1;
      } else {
	/* extend singleton task by one */
	sn_last++;
	sn_size++;
      }
      
      /* insert task if ... */
      if (i==cl_end || sn_size>=max_size ||
	    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

	/* Check if process involved in s-task */
	proc_involved = false;
	for (k=sn_first; k<=sn_last; k++) {
	  if (iproc[k] == pid) {
	    proc_involved = true;
	    break;
	  }
	}
	if (proc_involved == false) {
	  task_inserted = true;
	  new_first = i + 1;
	  continue;
	}

	/* Insert task as process is involved */
	if (sn_first == cl_begin) {
	  lgap = cl->lgap;
	} else {
	  lgap = Wgap[sn_first-1];
	}
	
	PMR_increment_rrr_dependencies(RRR);
	
	task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
				 bl_end, bl_spdiam, lgap, RRR);
	
	PMR_insert_task_at_back(workQ->s_queue, task);
	  
	task_inserted = true;
      } else {
	task_inserted = false;
      }
      
    } else {
      /* cluster was found */

      /* check if process involved in processing the new cluster */
      new_lpid = nproc-1;
      new_rpid = -1;
      for (l=new_first; l<=new_last; l++) {
	if (iproc[l] != -1) {
	  new_lpid = imin(new_lpid, iproc[l]);
	  new_rpid = imax(new_rpid, iproc[l]);
	  }
      }
      if (new_lpid > pid || new_rpid < pid) {
	task_inserted = true;
	new_first = i + 1;
	continue;
      }

      /* find gap to the left */
      if (new_first == cl_begin) {
	lgap = cl->lgap;
      } else {
	lgap = Wgap[new_first - 1];
      }
    
      /* determine where to store the parent rrr needed by the
       * cluster to find its new rrr */
      my_first = imax(new_first, cl->proc_W_begin);
      my_last  = imin(new_last,  cl->proc_W_end);
      if ( my_first == my_last ) {
	/* only one eigenvalue of cluster belongs to process */
	copy_parent_rrr = true;
      } else {
	/* store parent rrr in Z at column new_ftt */
	copy_parent_rrr = false;
      }
      new_ftt1 = Zindex[my_first    ];
      new_ftt2 = Zindex[my_first + 1];

      if (copy_parent_rrr == true) {
	/* Copy parent RRR into alloceted arrays and mark them
	 * for freeing later */
	D_parent = (double *) malloc(bl_size * sizeof(double));
	assert(D_parent != NULL);
	
	L_parent = (double *) malloc(bl_size * sizeof(double));
	assert(L_parent != NULL);

	memcpy(D_parent, RRR->D, bl_size*sizeof(double));
	memcpy(L_parent, RRR->L, bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, 
				    NULL, bl_size, depth);
	PMR_set_copied_parent_rrr_flag(RRR_parent, true);

      } else {
	/* copy parent RRR into Z to make cluster task independent */
	memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
	       bl_size*sizeof(double));
	memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
	       bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin],
				    &Z[new_ftt2*ldz + bl_begin],
				    NULL, NULL, bl_size, depth);
      }
      
      /* Create the task for the cluster and put it in the queue */ 
      task = PMR_create_c_task(new_first, new_last, depth+1, 
			       bl_begin, bl_end, bl_spdiam, lgap, 
			       cl->proc_W_begin, cl->proc_W_end, 
			       new_lpid, new_rpid, RRR_parent);

      if (new_lpid != new_rpid)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
	PMR_insert_task_at_back(workQ->c_queue, task);

      task_inserted = true;
      
    } /* if singleton or cluster found */

    new_first = i + 1;
  } /* end i */
  
  /* set flag in RRR that last singleton is created */
  PMR_set_parent_processed_flag(RRR);
  
  /* clean up */
  PMR_try_destroy_rrr(RRR);
  free(cl);

  return 0;
} /* end create_subtasks */
Beispiel #3
0
/* 
 * Refine eigenvalues with respect to new rrr 
 */
static inline 
int refine_eigvals
(cluster_t *cl, int rf_begin, int rf_end,
 int tid, proc_t *procinfo, rrr_t *RRR, 
 val_t *Wstruct, vec_t *Zstruct,
 tol_t *tolstruct, counter_t *num_left,
 workQ_t *workQ, double *work, int *iwork)
{
  int    rf_size   = rf_end-rf_begin+1;
  int    bl_begin  = cl->bl_begin;
  int    bl_end    = cl->bl_end;
  int    bl_size   = bl_end - bl_begin + 1;
  double bl_spdiam = cl->bl_spdiam;

  double *restrict D   = RRR->D;
  double *restrict L   = RRR->L;
  double *restrict DLL = RRR->DLL;

  double *restrict W         = Wstruct->W;
  double *restrict Werr      = Wstruct->Werr;
  double *restrict Wgap      = Wstruct->Wgap;
  int    *restrict Windex    = Wstruct->Windex;
  double *restrict Wshifted  = Wstruct->Wshifted;

  double pivmin = tolstruct->pivmin;
  double rtol1 = tolstruct->rtol1;
  double rtol2 = tolstruct->rtol2;

  /* Determine if refinement should be split into tasks */
  int left = PMR_get_counter_value(num_left);
  int nz = Zstruct->nz;
  int nthreads = procinfo->nthreads;
  int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads));
  int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK);

  int offset, i, p, q;
  double savegap;
  task_t *task;
  if (own_part < rf_size) {
    int others_part = rf_size - own_part;
    int num_tasks   = iceil(rf_size, own_part) - 1; /* >1 */
    int chunk       = others_part/num_tasks;        /* floor */

    int ts_begin=rf_begin, ts_end;
    p = Windex[rf_begin];
    for (i=0; i<num_tasks; i++) {
      ts_end = ts_begin + chunk - 1;
      q      = p        + chunk - 1;

      task = 
        PMR_create_r_task
        (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid);
     
      if (ts_begin <= ts_end)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
        PMR_refine_sem_post(task->data); /* case chunk=0 */

      ts_begin = ts_end + 1;
      p        = q      + 1;
    }
    ts_end = rf_end;
    q      = Windex[rf_end];
    offset = Windex[ts_begin] - 1;

    /* Call bisection routine to refine the values */
    if (ts_begin <= ts_end) {
      int info;
      odrrb
      (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
       &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
       work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
      assert( info == 0 );
    }

    /* Empty "all" r-queue refine tasks before waiting */
    int num_iter = PMR_get_num_tasks(workQ->r_queue);
    for (i=0; i<num_iter; i++) {
      task = PMR_remove_task_at_front(workQ->r_queue);
      if (task != NULL) {
	if (task->flag == REFINE_TASK_FLAG) {
	  PMR_process_r_task
          ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork);
	  free(task);
	} else {
	  PMR_insert_task_at_back(workQ->r_queue, task);
	}
      } /* if task */
    } /* end for i */
    
    /* Barrier: wait until all created tasks finished */
    int count = num_tasks;
    while (count > 0) {
      while ( PMR_refine_sem_wait(task->data) != 0 ) { };
      count--;
    }
    PMR_refine_sem_destroy(task->data);

    /* Edit right gap at splitting point */
    ts_begin = rf_begin;
    for (i=0; i<num_tasks; i++) {
      ts_end = ts_begin + chunk - 1;
      
      Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1]
			  - Wshifted[ts_end] - Werr[ts_end]);
      
      ts_begin = ts_end + 1;
    }
  } else {
    /* Refinement of cluster without creating tasks */
    
    /* 'p' and 'q' are local (within block) indices of
     * the first/last eigenvalue of the cluster */
    p = Windex[rf_begin];
    q = Windex[rf_end];
    
    offset = Windex[rf_begin] - 1;    /* = p - 1 */
    
    if (p == q) {
      savegap = Wgap[rf_begin];
      Wgap[rf_begin] = 0.0;
    }  
    
    /* Bisection routine to refine the values */
    int info;
    odrrb
    (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
     &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin],
     work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
    assert( info == 0 );
    
    if (p == q)
      Wgap[rf_begin] = savegap;
  } /* end refine with or without creating tasks */

  /* refined eigenvalues with all shifts applied in W */
  double sigma = L[bl_size-1];
  for (i=rf_begin; i<=rf_end; i++)
    W[i] = Wshifted[i] + sigma;

  return 0;
} /* end refine_eigvals */
Beispiel #4
0
/*
 * Processes all the tasks put in the work queue.
 */
static void *empty_workQ(void *argin)
{
  /* input arguments */
  int       tid;
  int       nthreads;
  counter_t *num_left;
  workQ_t   *workQ;
  in_t      *Dstruct;
  val_t     *Wstruct;
  vec_t     *Zstruct;
  tol_t     *tolstruct;
  int       n;

  /* others */
  task_t    *task;
  double    *work;
  int       *iwork;

  /* retrieve necessary arguments from structures */
  retrieve_aux3((aux3_t *) argin, &tid, &nthreads, &num_left,
		&workQ, &Dstruct, &Wstruct, &Zstruct, &tolstruct);
  
  n = Wstruct->n;

  /* max. needed double precision work space: dlar1v */
  work      = (double *) malloc( 4*n * sizeof(double) );
  assert(work != NULL);

  /* max. needed double precision work space: dlarrb */
  iwork     = (int *)    malloc( 2*n * sizeof(int)    );
  assert(iwork != NULL);

  /* While loop to empty the work queue */
  while (PMR_get_counter_value(num_left) > 0) {
    
    task = PMR_remove_task_at_front(workQ->r_queue);
    if (task != NULL) {
      assert(task->flag == REFINEMENT_TASK_FLAG);

      PMR_process_r_task((refine_t *) task->data, tid, Wstruct, 
			 tolstruct, work, iwork);
      free(task);
      continue;
    }
    
    task = PMR_remove_task_at_front(workQ->s_queue);
    if ( task != NULL ) {
      assert(task->flag == SINGLETON_TASK_FLAG);

      PMR_process_s_task((singleton_t *) task->data, tid, num_left, 
			 workQ, Wstruct, Zstruct, tolstruct, work, 
			 iwork);
      free(task);
      continue;
    }
    
    task = PMR_remove_task_at_front(workQ->c_queue);
    if ( task != NULL ) {
      assert(task->flag == CLUSTER_TASK_FLAG);

      PMR_process_c_task((cluster_t *) task->data, tid, nthreads, 
			 num_left, workQ, Wstruct, Zstruct, tolstruct, 
			 work, iwork);
      free(task);
      continue;
    }

  } /* end while */

  free(work);
  free(iwork);

  return(NULL);
}