示例#1
0
/*
 * Executes all tasks which are in the r-queue at the moment of the 
 * call. This routine is called to make sure that all tasks in the 
 * queue are dequeued before continueing with other tasks.
 */
void PMR_process_r_queue(int tid, proc_t *procinfo, val_t *Wstruct, 
			 vec_t *Zstruct, tol_t *tolstruct, 
			 workQ_t *workQ, counter_t *num_left, 
			 double *work, int *iwork)
{
  int        thread_support = procinfo->thread_support;
  int        t, num_tasks;
  int        status;
  task_t     *task;

  num_tasks = PMR_get_num_tasks(workQ->r_queue);

  for (t=0; t<num_tasks; t++) {
    
    task = PMR_remove_task_at_front(workQ->r_queue);

    if ( task != NULL ) {
    
      if (task->flag == CLUSTER_TASK_FLAG) {

	if (thread_support != MPI_THREAD_FUNNELED || tid == 0) {
	  /* if MPI_THREAD_FUNNELED only tid==0 should process 
           * these tasks, otherwise any thread can do it */
	  status = PMR_process_c_task((cluster_t *) task->data,
				      tid, procinfo, Wstruct,
				      Zstruct, tolstruct, workQ,
				      num_left, work, iwork);
	  
	  if (status == C_TASK_PROCESSED) {
	    free(task);
	  } else {
	    PMR_insert_task_at_back(workQ->r_queue, task);
	  }
	} else {
	    PMR_insert_task_at_back(workQ->r_queue, task);
	}

      } /* end if cluster task */

      if (task->flag == REFINE_TASK_FLAG) {
	PMR_process_r_task((refine_t *) task->data, procinfo,
			   Wstruct, tolstruct, work, iwork);
	free(task);
      }
 
    } /* end if task removed */
  } /* end for t */
} /* end process_entire_r_queue */
示例#2
0
/* TODO: Refactor this routine */
static inline 
int create_subtasks
(cluster_t *cl, int tid, proc_t *procinfo, 
 rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
 workQ_t *workQ, counter_t *num_left)
{
  /* From inputs */
  int              cl_begin  = cl->begin;
  int              cl_end    = cl->end;
  int              depth     = cl->depth;
  int              bl_begin  = cl->bl_begin;
  int              bl_end    = cl->bl_end;
  int              bl_size   = bl_end - bl_begin + 1;
  double           bl_spdiam = cl->bl_spdiam;
  double           lgap;

  int  pid       = procinfo->pid;
  int  nproc     = procinfo->nproc;
  int  nthreads  = procinfo->nthreads;
  bool proc_involved=true;

  double *restrict Wgap      = Wstruct->Wgap;
  double *restrict Wshifted  = Wstruct->Wshifted;
  int    *restrict iproc     = Wstruct->iproc;

  int              ldz       = Zstruct->ldz;
  double *restrict Z         = Zstruct->Z;
  int    *restrict Zindex    = Zstruct->Zindex;

  /* others */
  int    i, l, k;
  int    max_size;
  task_t *task;
  bool   task_inserted;
  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
  int    sn_first, sn_last, sn_size;
  rrr_t  *RRR_parent;
  int    new_lpid, new_rpid;
  double *restrict D_parent;
  double *restrict L_parent;
  int    my_first, my_last;
  bool   copy_parent_rrr;


  max_size = fmax(1, PMR_get_counter_value(num_left) /
		     (fmin(depth+1,4)*nthreads) );
  task_inserted = true;
  new_first = cl_begin;
  for (i=cl_begin; i<=cl_end; i++) {    

    if ( i == cl_end )
      new_last = i;
    else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
      new_last = i;
    else
      continue;

    new_size = new_last - new_first + 1;

    if (new_size == 1) {
      /* singleton was found */
      
      if (new_first==cl_begin || task_inserted==true) {
	/* initialize new singleton task */
	sn_first = new_first;
	sn_last  = new_first;
	sn_size  = 1;
      } else {
	/* extend singleton task by one */
	sn_last++;
	sn_size++;
      }
      
      /* insert task if ... */
      if (i==cl_end || sn_size>=max_size ||
	    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

	/* Check if process involved in s-task */
	proc_involved = false;
	for (k=sn_first; k<=sn_last; k++) {
	  if (iproc[k] == pid) {
	    proc_involved = true;
	    break;
	  }
	}
	if (proc_involved == false) {
	  task_inserted = true;
	  new_first = i + 1;
	  continue;
	}

	/* Insert task as process is involved */
	if (sn_first == cl_begin) {
	  lgap = cl->lgap;
	} else {
	  lgap = Wgap[sn_first-1];
	}
	
	PMR_increment_rrr_dependencies(RRR);
	
	task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
				 bl_end, bl_spdiam, lgap, RRR);
	
	PMR_insert_task_at_back(workQ->s_queue, task);
	  
	task_inserted = true;
      } else {
	task_inserted = false;
      }
      
    } else {
      /* cluster was found */

      /* check if process involved in processing the new cluster */
      new_lpid = nproc-1;
      new_rpid = -1;
      for (l=new_first; l<=new_last; l++) {
	if (iproc[l] != -1) {
	  new_lpid = imin(new_lpid, iproc[l]);
	  new_rpid = imax(new_rpid, iproc[l]);
	  }
      }
      if (new_lpid > pid || new_rpid < pid) {
	task_inserted = true;
	new_first = i + 1;
	continue;
      }

      /* find gap to the left */
      if (new_first == cl_begin) {
	lgap = cl->lgap;
      } else {
	lgap = Wgap[new_first - 1];
      }
    
      /* determine where to store the parent rrr needed by the
       * cluster to find its new rrr */
      my_first = imax(new_first, cl->proc_W_begin);
      my_last  = imin(new_last,  cl->proc_W_end);
      if ( my_first == my_last ) {
	/* only one eigenvalue of cluster belongs to process */
	copy_parent_rrr = true;
      } else {
	/* store parent rrr in Z at column new_ftt */
	copy_parent_rrr = false;
      }
      new_ftt1 = Zindex[my_first    ];
      new_ftt2 = Zindex[my_first + 1];

      if (copy_parent_rrr == true) {
	/* Copy parent RRR into alloceted arrays and mark them
	 * for freeing later */
	D_parent = (double *) malloc(bl_size * sizeof(double));
	assert(D_parent != NULL);
	
	L_parent = (double *) malloc(bl_size * sizeof(double));
	assert(L_parent != NULL);

	memcpy(D_parent, RRR->D, bl_size*sizeof(double));
	memcpy(L_parent, RRR->L, bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, 
				    NULL, bl_size, depth);
	PMR_set_copied_parent_rrr_flag(RRR_parent, true);

      } else {
	/* copy parent RRR into Z to make cluster task independent */
	memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
	       bl_size*sizeof(double));
	memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
	       bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin],
				    &Z[new_ftt2*ldz + bl_begin],
				    NULL, NULL, bl_size, depth);
      }
      
      /* Create the task for the cluster and put it in the queue */ 
      task = PMR_create_c_task(new_first, new_last, depth+1, 
			       bl_begin, bl_end, bl_spdiam, lgap, 
			       cl->proc_W_begin, cl->proc_W_end, 
			       new_lpid, new_rpid, RRR_parent);

      if (new_lpid != new_rpid)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
	PMR_insert_task_at_back(workQ->c_queue, task);

      task_inserted = true;
      
    } /* if singleton or cluster found */

    new_first = i + 1;
  } /* end i */
  
  /* set flag in RRR that last singleton is created */
  PMR_set_parent_processed_flag(RRR);
  
  /* clean up */
  PMR_try_destroy_rrr(RRR);
  free(cl);

  return 0;
} /* end create_subtasks */
示例#3
0
/*
 * Initialize work queue by putting all tasks for the process
 * into the work queue.
 */
static 
int init_workQ
(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ)
{
  int              pid      = procinfo->pid;
  int              nproc    = procinfo->nproc;
  int              nthreads = procinfo->nthreads;
  double *restrict D        = Dstruct->D;
  double *restrict L        = Dstruct->E;
  int              nsplit   = Dstruct->nsplit;
  int    *restrict isplit   = Dstruct->isplit;
  double *restrict W        = Wstruct->W;
  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict iproc    = Wstruct->iproc;
  double *restrict Wshifted = Wstruct->Wshifted;
  double *restrict gersch   = Wstruct->gersch;
  int              nz       = *nzp;

  /* Loop over blocks */
  int i, j, k, l;
  int ibegin  = 0;
  for ( j=0; j<nsplit; j++ ) {
    int iend = isplit[j] - 1;
    int isize = iend - ibegin + 1;
    double sigma = L[iend];

    /* Use Gerschgorin disks to find spectral diameter */
    double gl = gersch[2*ibegin    ];
    double gu = gersch[2*ibegin + 1];
    for (i=ibegin+1; i<iend; i++) {
      gl = fmin(gl, gersch[2*i    ]);
      gu = fmax(gu, gersch[2*i + 1]);
    }
    double spdiam = gu - gl;
    double avggap = spdiam / (isize-1);

    /* Find eigenvalues in block */
    int nbl = 0;
    int iWbegin = iend   + 1;
    int iWend   = ibegin - 1;
    for (i=ibegin; i<=iend; i++) {
      if (nbl == 0 && iproc[i] == pid) {
        iWbegin = i;
        iWend   = i;
        nbl++;
        k = i+1;
        while (k <=iend && iproc[k] == pid) {
          iWend++;
          nbl++;
          k++;
        }
        /* iWend = iWbegin + nbl - 1; instead of incrementing in loop */
      }
    }

    /* If no eigenvalues for process in block continue */
    if (nbl == 0) {
      ibegin = iend + 1;
      continue;
    }

    /* Compute DL and DLL for later computation of singletons
     * (freed when all singletons of root are computed) */
    double *DL = (double*)malloc(isize*sizeof(double));
    assert(DL != NULL);
    
    double *DLL = (double*)malloc(isize*sizeof(double));
    assert(DLL != NULL);

    for (i=0; i<isize-1; i++) {
      double tmp = D[i+ibegin]*L[i+ibegin];
      DL[i]      = tmp;
      DLL[i]     = tmp*L[i+ibegin];
    }

    rrr_t *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0);
    PMR_increment_rrr_dependencies(RRR);
    
    /* In W apply shift of current block to eigenvalues
     * to get unshifted values w.r.t. T */
    for (i=ibegin; i<=iend; i++)
      W[i] += sigma;

    /* Split eigenvalues of block into singletons and clusters
     * and add them to process work queue */
    int max_size = imax(1, nz/nthreads);
    bool task_inserted = false;
    int new_first=ibegin, new_last;
    int sn_first, sn_last, sn_size;
    for (i=ibegin; i<=iend; i++) {
      if (i == iend)
        new_last = i;
      else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]))
        new_last = i;
      else
        continue;

      /* Skip rest if no eigenvalues of process */
      if (new_first > iWend || new_last < iWbegin) {
        new_first = i + 1;
        continue;
      }

      int new_size = new_last - new_first + 1;
      
      if (new_size == 1) {
        /* Singleton was found */
        if (new_first < iWbegin || new_first > iWend) {
          new_first = i + 1;
          continue;
        } else {
          if (new_first==iWbegin || task_inserted==true) {
            /* Initialize new singleton task */
            sn_first = new_first;
            sn_last  = new_first;
            sn_size  = 1;
          } else {
            /* Extend singleton task by one */
            sn_last++;
            sn_size++;
          }
        }

        /* Insert task if ... */
        if (i==iWend || sn_size>=max_size ||
            Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

          double lgap;
          if (sn_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[sn_first-1];
          }

          PMR_increment_rrr_dependencies(RRR);

          task_t *task = 
            PMR_create_s_task
            (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR);
          
          PMR_insert_task_at_back(workQ->s_queue, task);
          
          task_inserted = true;
        } else {
          task_inserted = false;
        }
      } else {
        /* Cluster was found */
        int cl_first = new_first;
        int cl_last  = new_last;
        int cl_size  = new_size;

        /* Split cluster into clusters by absolut criterion */
        if (cl_size > 3) {
          /* Split cluster to smaller clusters [cl_first:cl_last] */
          for (k=new_first+1; k<new_last; k++) {
            if (k == new_last-1)
              cl_last = new_last;
            else if (k != cl_first && Wgap[k] > 0.8*avggap)
              cl_last = k;
            else
              continue;

            /* Skip cluster if no eigenvalues of process in it */
            if (cl_last < iWbegin || cl_first > iWend) {
              cl_first = k + 1;
              continue;
            }

            /* Record left gap of cluster */
            double lgap;
            if (cl_first == ibegin) {
              lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl);
            } else {
              lgap = Wgap[cl_first-1];
            }

            /* Determine processes involved in processing the cluster */
            int left_pid  = nproc-1;
            int right_pid = 0;
            for (l=cl_first; l<=cl_last; l++) {
              if (iproc[l] != -1) {
                left_pid  = imin(left_pid,  iproc[l]);
                right_pid = imax(right_pid, iproc[l]);
              }
            }

            rrr_t *RRR_parent = 
              PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

            task_t *task = 
              PMR_create_c_task
              (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, 
               iWend, left_pid, right_pid, RRR_parent);

            /* Insert task into queue, depending if cluster need
             * communication with other processes */
            if (left_pid != right_pid)
              PMR_insert_task_at_back(workQ->r_queue, task);
            else
              PMR_insert_task_at_back(workQ->c_queue, task);
            
            cl_first = k + 1;
          } /* end k */
        } else {
          /* Cluster is too small to split, so insert it to queue */

          /* Record left gap of cluster */
          double lgap;
          if (cl_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[cl_first-1];
          }

          /* Determine processes involved */
          int left_pid  = nproc-1;
          int right_pid = 0;
          for (l=cl_first; l<=cl_last; l++) {
            if (iproc[l] != -1) {
              left_pid  = imin(left_pid,  iproc[l]);
              right_pid = imax(right_pid, iproc[l]);
            }
          }

          rrr_t *RRR_parent = 
            PMR_create_rrr
            (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

          task_t *task = 
            PMR_create_c_task
            (cl_first, cl_last, 1, ibegin, 
             iend, spdiam, lgap, iWbegin, iWend,
             left_pid, right_pid, RRR_parent);

          /* Insert task into queue, depending if cluster need
           * communication with other processes */
          if (left_pid != right_pid)
            PMR_insert_task_at_back(workQ->r_queue, task);
          else
            PMR_insert_task_at_back(workQ->c_queue, task);
        }
        task_inserted = true;
      } /* end new_size */

      new_first = i + 1;
    } /* end of splitting eigenvalues into tasks */

    /* Set flag in RRR that last singleton is created */
    PMR_set_parent_processed_flag(RRR);
    PMR_try_destroy_rrr(RRR);

    ibegin = iend + 1;
  } /* end loop over blocks */ 

  return 0;
}
示例#4
0
/* 
 * Refine eigenvalues with respect to new rrr 
 */
static inline 
int refine_eigvals
(cluster_t *cl, int rf_begin, int rf_end,
 int tid, proc_t *procinfo, rrr_t *RRR, 
 val_t *Wstruct, vec_t *Zstruct,
 tol_t *tolstruct, counter_t *num_left,
 workQ_t *workQ, double *work, int *iwork)
{
  int    rf_size   = rf_end-rf_begin+1;
  int    bl_begin  = cl->bl_begin;
  int    bl_end    = cl->bl_end;
  int    bl_size   = bl_end - bl_begin + 1;
  double bl_spdiam = cl->bl_spdiam;

  double *restrict D   = RRR->D;
  double *restrict L   = RRR->L;
  double *restrict DLL = RRR->DLL;

  double *restrict W         = Wstruct->W;
  double *restrict Werr      = Wstruct->Werr;
  double *restrict Wgap      = Wstruct->Wgap;
  int    *restrict Windex    = Wstruct->Windex;
  double *restrict Wshifted  = Wstruct->Wshifted;

  double pivmin = tolstruct->pivmin;
  double rtol1 = tolstruct->rtol1;
  double rtol2 = tolstruct->rtol2;

  /* Determine if refinement should be split into tasks */
  int left = PMR_get_counter_value(num_left);
  int nz = Zstruct->nz;
  int nthreads = procinfo->nthreads;
  int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads));
  int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK);

  int offset, i, p, q;
  double savegap;
  task_t *task;
  if (own_part < rf_size) {
    int others_part = rf_size - own_part;
    int num_tasks   = iceil(rf_size, own_part) - 1; /* >1 */
    int chunk       = others_part/num_tasks;        /* floor */

    int ts_begin=rf_begin, ts_end;
    p = Windex[rf_begin];
    for (i=0; i<num_tasks; i++) {
      ts_end = ts_begin + chunk - 1;
      q      = p        + chunk - 1;

      task = 
        PMR_create_r_task
        (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid);
     
      if (ts_begin <= ts_end)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
        PMR_refine_sem_post(task->data); /* case chunk=0 */

      ts_begin = ts_end + 1;
      p        = q      + 1;
    }
    ts_end = rf_end;
    q      = Windex[rf_end];
    offset = Windex[ts_begin] - 1;

    /* Call bisection routine to refine the values */
    if (ts_begin <= ts_end) {
      int info;
      odrrb
      (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
       &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin],
       work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
      assert( info == 0 );
    }

    /* Empty "all" r-queue refine tasks before waiting */
    int num_iter = PMR_get_num_tasks(workQ->r_queue);
    for (i=0; i<num_iter; i++) {
      task = PMR_remove_task_at_front(workQ->r_queue);
      if (task != NULL) {
	if (task->flag == REFINE_TASK_FLAG) {
	  PMR_process_r_task
          ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork);
	  free(task);
	} else {
	  PMR_insert_task_at_back(workQ->r_queue, task);
	}
      } /* if task */
    } /* end for i */
    
    /* Barrier: wait until all created tasks finished */
    int count = num_tasks;
    while (count > 0) {
      while ( PMR_refine_sem_wait(task->data) != 0 ) { };
      count--;
    }
    PMR_refine_sem_destroy(task->data);

    /* Edit right gap at splitting point */
    ts_begin = rf_begin;
    for (i=0; i<num_tasks; i++) {
      ts_end = ts_begin + chunk - 1;
      
      Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1]
			  - Wshifted[ts_end] - Werr[ts_end]);
      
      ts_begin = ts_end + 1;
    }
  } else {
    /* Refinement of cluster without creating tasks */
    
    /* 'p' and 'q' are local (within block) indices of
     * the first/last eigenvalue of the cluster */
    p = Windex[rf_begin];
    q = Windex[rf_end];
    
    offset = Windex[rf_begin] - 1;    /* = p - 1 */
    
    if (p == q) {
      savegap = Wgap[rf_begin];
      Wgap[rf_begin] = 0.0;
    }  
    
    /* Bisection routine to refine the values */
    int info;
    odrrb
    (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, 
     &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin],
     work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
    assert( info == 0 );
    
    if (p == q)
      Wgap[rf_begin] = savegap;
  } /* end refine with or without creating tasks */

  /* refined eigenvalues with all shifts applied in W */
  double sigma = L[bl_size-1];
  for (i=rf_begin; i<=rf_end; i++)
    W[i] = Wshifted[i] + sigma;

  return 0;
} /* end refine_eigvals */
示例#5
0
文件: mrrr_vec.c 项目: HPAC/mr3smp
static inline void init_workQ(workQ_t *workQ, in_t *Dstruct, 
			      val_t *Wstruct)
{
  double *restrict D      = Dstruct->D;
  double *restrict L      = Dstruct->E;
  int    *restrict isplit = Dstruct->isplit;
  int              m      = Wstruct->m;
  double           *vlp   = Wstruct->vlp;
  double *restrict W      = Wstruct->W;
  double *restrict Werr   = Wstruct->Werr;
  int    *restrict iblock = Wstruct->iblock;

  int              j;
  int              begin ,  end;
  int              Wbegin, Wend;
  int              nbl;
  double           sigma;
  rrr_t            *RRR;
  double           lgap;
  task_t           *task;

  /* For every unreducible block of the matrix create a task
   * and put it in the queue */
  begin  = 0;
  Wbegin = 0;

  for (j=0; j<Dstruct->nsplit; j++) {

    end   = isplit[j] - 1;
    sigma = L[end];

    Wend = Wbegin-1;
    while (Wend < m-1 && iblock[Wend + 1] == j+1) {
	Wend++;
    }

    if (Wend < Wbegin) {
      begin = end + 1; 
      continue;
    }

    nbl = end - begin  + 1;

    RRR = PMR_create_rrr(&D[begin], &L[begin], NULL, NULL, nbl, -1);
  
    if (nbl == 1) {
      /* To make sure that RRR is freed when s-task is processed */
      PMR_increment_rrr_dependencies(RRR);
      PMR_set_parent_processed_flag(RRR);

      task = PMR_create_s_task(Wbegin, Wbegin, 1, begin, end, Wbegin, 
			       Wend, 0, 0, RRR);
      PMR_insert_task_at_back(workQ->s_queue, task);
      
      begin  = end  + 1;
      Wbegin = Wend + 1;
      continue;
    }

    lgap = fmax(0.0, (W[Wbegin]+sigma) - Werr[Wbegin] - (*vlp) );

    task = PMR_create_c_task(Wbegin, Wend, 0, begin, end, Wbegin, 
			     Wend, 0, lgap, RRR);
  
    PMR_insert_task_at_back(workQ->c_queue, task);

    begin  = end  + 1;
    Wbegin = Wend + 1;
  }
  /* end of loop over block */
}