Пример #1
0
int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo,
		       val_t *Wstruct, vec_t *Zstruct, 
		       tol_t *tolstruct, counter_t *num_left, 
		       double *work, int *iwork)
{
  /* Inputs */
  int    begin         = sng->begin; 
  int    end           = sng->end;
  int    bl_begin      = sng->bl_begin;
  int    bl_end        = sng->bl_end;
  int    bl_size       = bl_end - bl_begin + 1;
  double bl_spdiam     = sng->bl_spdiam; 
  rrr_t  *RRR          = sng->RRR;
  double *restrict D   = RRR->D; 
  double *restrict L   = RRR->L; 
  double *restrict DL  = RRR->DL;
  double *restrict DLL = RRR->DLL;

  int              pid      = procinfo->pid;  
  int              n        = Wstruct->n;
  double *restrict W        = Wstruct->W;
  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict Windex   = Wstruct->Windex;  
  int    *restrict iproc    = Wstruct->iproc;  
  double *restrict Wshifted = Wstruct->Wshifted;
  int              ldz      = Zstruct->ldz;
  double *restrict Z        = Zstruct->Z;
  int    *restrict isuppZ   = Zstruct->Zsupp;;
  int    *restrict Zindex   = Zstruct->Zindex;
  double           pivmin   = tolstruct->pivmin;

  /* others */
  int              info, i, k, itmp, num_decrement=0;
  int              IONE = 1;
  double           DZERO = 0.0;
  double           tol, lambda, left, right;
  int              i_local, zind;
  double           gap, lgap, rgap, gaptol, savedgap, tmp;
  int             usedBS, usedRQ, needBS, wantNC, step2II;
  int              r, offset;
  double           twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON;
  double           residual, bstres, bstw; 
  int              i_supmn, i_supmx;
  double           RQcorr;
  int              negcount;
  int              sgndef, suppsize;
  double           sigma;
  int              i_Zfrom, i_Zto;
  double           ztz, norminv, mingma;


  /* set tolerance parameter */
  tol  = 4.0 * log( (double) bl_size ) * DBL_EPSILON;

  /* loop over all singletons in the task */
  for (i=begin; i<=end; i++) {

    /* check if eigenvector is supposed to be computed by
     * the process */
    if (iproc[i] != pid)
      continue;
    num_decrement++;

    if (bl_size == 1) {
      /* set eigenvector to column of identity matrix */
      zind = Zindex[i];
      memset(&Z[zind*ldz], 0.0, n*sizeof(double) );
      Z[zind*ldz + bl_begin] = 1.0;
      isuppZ[2*zind    ]     = bl_begin + 1;
      isuppZ[2*zind + 1]     = bl_begin + 1;
      continue;
    }

    lambda  = Wshifted[i];  
    left    = Wshifted[i] - Werr[i];
    right   = Wshifted[i] + Werr[i];
    i_local = Windex[i];
    r       = 0;
    
    /* compute left and right gap */
    if (i == bl_begin)
      lgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
    else if (i == begin)
      lgap = sng->lgap;
    else
      lgap = Wgap[i-1];

    if (i == bl_end) {
      rgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
    } else {
      rgap = Wgap[i];
    }

    gap = fmin(lgap, rgap);

    if ( i == bl_begin || i == bl_end ) {
      gaptol = 0.0;
    } else {
      gaptol = gap * DBL_EPSILON;
    }

    /* initialize lower and upper value of support */
    i_supmn = bl_size;
    i_supmx = 1;

    /* update Wgap so that it holds minimum gap and save the 
     * old value */
    savedgap  = Wgap[i];
    Wgap[i]   = gap;
    
    /* initialize flags indicating if bisection or Rayleigh-Quotient
     * correction was used */
    usedBS = false;
    usedRQ = false;
  
    /* the need for bisection is initially turned off */
    needBS = !TRY_RQC;

    /* IEEE floating point is assumed, so that all 0 bits are 0.0 */
    zind = Zindex[i];
    memset(&Z[zind*ldz], 0.0, n*sizeof(double));

    /* inverse iteration with twisted factorization */
    for (k=1; k<=MAXITER; k++) {

      if (needBS == true) {
	usedBS = true;
	itmp   = r;
	
	offset  = Windex[i] - 1;
	tmp     = Wgap[i]; 
	Wgap[i] = 0.0;
	
	odrrb_(&bl_size, D, DLL, &i_local, &i_local, &DZERO, 
		&twoeps, &offset, &Wshifted[i], &Wgap[i],
		&Werr[i], work, iwork, &pivmin, &bl_spdiam,
		&itmp, &info);
	assert(info == 0);
	
	Wgap[i] = tmp;
	lambda = Wshifted[i];
	r = 0;
      }
      wantNC = (usedBS == true) ? false : true;

      /* compute the eigenvector corresponding to lambda */
      odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
	      &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
	      &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
	      &norminv, &residual, &RQcorr, work);

      if (k == 1) {
	bstres = residual;
	bstw   = lambda;
      } else if (residual < bstres) {
	bstres = residual;
	bstw   = lambda;
      }
      
      /* update support held */
      i_supmn = imin(i_supmn, isuppZ[2*zind    ]);
      i_supmx = imax(i_supmx, isuppZ[2*zind + 1]);

      /* Convergence test for Rayleigh Quotient Iteration
       * not done if bisection was used */
      if ( !usedBS && residual > tol*gap 
	   && fabs(RQcorr) > RQtol*fabs(lambda) ) {
      
	if (i_local <= negcount) {
	  sgndef = -1;    /* wanted eigenvalue lies to the left  */
	} else {
	  sgndef =  1;    /* wanted eigenvalue lies to the right */
	}
	
	if ( RQcorr*sgndef >= 0.0
	     && lambda+RQcorr <= right 
	     && lambda+RQcorr >= left ) {
	  usedRQ = true;
	  if ( sgndef == 1 )
	    left  = lambda;
	  else
	    right = lambda;
	  Wshifted[i] = 0.5*(left + right);
	  lambda     += RQcorr;
	} else { /* bisection is needed */
	  needBS = true;
	}
	
	if ( right-left < RQtol*fabs(lambda) ) {
	  /* eigenvalue computed to bisection accuracy
	   * => compute eigenvector */
	  usedBS = true;
	} else if ( k == MAXITER-1 ) {
	  /* for last iteration use bisection */
	  needBS = true;
	}
      } else {
	/* go to next iteration */
	break;
      }

    } /* end k */

    /* if necessary call odr1v to improve error angle by 2nd step */
    step2II = false;
    if ( usedRQ && usedBS && (bstres <= residual) ) {
      lambda = bstw;
      step2II = true;
    }
    if ( step2II == true ) {
      odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
	      &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
	      &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
	      &norminv, &residual, &RQcorr, work);
    }
    Wshifted[i] = lambda;

    /* compute support w.r.t. whole matrix
     * block beginning is offset for each support */
    isuppZ[2*zind    ] += bl_begin;
    isuppZ[2*zind + 1] += bl_begin;
  
    /* ensure vector is okay if support changed in RQI 
     * minus ones because of indices starting from zero */
    i_Zfrom    = isuppZ[2*zind    ] - 1;
    i_Zto      = isuppZ[2*zind + 1] - 1;
    i_supmn   += bl_begin - 1;
    i_supmx   += bl_begin - 1;
    if ( i_supmn < i_Zfrom ) {
      for ( k=i_supmn; k < i_Zfrom; k++ ) {
	Z[k + zind*ldz] = 0.0;
      }
    }
    if ( i_supmx > i_Zto ) {
      for ( k=i_Zto+1; k <= i_supmx; k++ ) {
	Z[k + zind*ldz] = 0.0;
      }
    }
    
    /* normalize eigenvector */
    suppsize = i_Zto - i_Zfrom + 1;
    odscl_(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE);

    sigma = L[bl_size-1];
    W[i]  = lambda + sigma;
    
    if (i < end)
      Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]);

  } /* end i */

  /* decrement counter */
  PMR_decrement_counter(num_left, num_decrement);

  /* clean up */
  free(sng);
  PMR_try_destroy_rrr(RRR);

  return(0);
}
Пример #2
0
/* TODO: Refactor this routine */
static inline 
int create_subtasks
(cluster_t *cl, int tid, proc_t *procinfo, 
 rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
 workQ_t *workQ, counter_t *num_left)
{
  /* From inputs */
  int              cl_begin  = cl->begin;
  int              cl_end    = cl->end;
  int              depth     = cl->depth;
  int              bl_begin  = cl->bl_begin;
  int              bl_end    = cl->bl_end;
  int              bl_size   = bl_end - bl_begin + 1;
  double           bl_spdiam = cl->bl_spdiam;
  double           lgap;

  int  pid       = procinfo->pid;
  int  nproc     = procinfo->nproc;
  int  nthreads  = procinfo->nthreads;
  bool proc_involved=true;

  double *restrict Wgap      = Wstruct->Wgap;
  double *restrict Wshifted  = Wstruct->Wshifted;
  int    *restrict iproc     = Wstruct->iproc;

  int              ldz       = Zstruct->ldz;
  double *restrict Z         = Zstruct->Z;
  int    *restrict Zindex    = Zstruct->Zindex;

  /* others */
  int    i, l, k;
  int    max_size;
  task_t *task;
  bool   task_inserted;
  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
  int    sn_first, sn_last, sn_size;
  rrr_t  *RRR_parent;
  int    new_lpid, new_rpid;
  double *restrict D_parent;
  double *restrict L_parent;
  int    my_first, my_last;
  bool   copy_parent_rrr;


  max_size = fmax(1, PMR_get_counter_value(num_left) /
		     (fmin(depth+1,4)*nthreads) );
  task_inserted = true;
  new_first = cl_begin;
  for (i=cl_begin; i<=cl_end; i++) {    

    if ( i == cl_end )
      new_last = i;
    else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
      new_last = i;
    else
      continue;

    new_size = new_last - new_first + 1;

    if (new_size == 1) {
      /* singleton was found */
      
      if (new_first==cl_begin || task_inserted==true) {
	/* initialize new singleton task */
	sn_first = new_first;
	sn_last  = new_first;
	sn_size  = 1;
      } else {
	/* extend singleton task by one */
	sn_last++;
	sn_size++;
      }
      
      /* insert task if ... */
      if (i==cl_end || sn_size>=max_size ||
	    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

	/* Check if process involved in s-task */
	proc_involved = false;
	for (k=sn_first; k<=sn_last; k++) {
	  if (iproc[k] == pid) {
	    proc_involved = true;
	    break;
	  }
	}
	if (proc_involved == false) {
	  task_inserted = true;
	  new_first = i + 1;
	  continue;
	}

	/* Insert task as process is involved */
	if (sn_first == cl_begin) {
	  lgap = cl->lgap;
	} else {
	  lgap = Wgap[sn_first-1];
	}
	
	PMR_increment_rrr_dependencies(RRR);
	
	task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
				 bl_end, bl_spdiam, lgap, RRR);
	
	PMR_insert_task_at_back(workQ->s_queue, task);
	  
	task_inserted = true;
      } else {
	task_inserted = false;
      }
      
    } else {
      /* cluster was found */

      /* check if process involved in processing the new cluster */
      new_lpid = nproc-1;
      new_rpid = -1;
      for (l=new_first; l<=new_last; l++) {
	if (iproc[l] != -1) {
	  new_lpid = imin(new_lpid, iproc[l]);
	  new_rpid = imax(new_rpid, iproc[l]);
	  }
      }
      if (new_lpid > pid || new_rpid < pid) {
	task_inserted = true;
	new_first = i + 1;
	continue;
      }

      /* find gap to the left */
      if (new_first == cl_begin) {
	lgap = cl->lgap;
      } else {
	lgap = Wgap[new_first - 1];
      }
    
      /* determine where to store the parent rrr needed by the
       * cluster to find its new rrr */
      my_first = imax(new_first, cl->proc_W_begin);
      my_last  = imin(new_last,  cl->proc_W_end);
      if ( my_first == my_last ) {
	/* only one eigenvalue of cluster belongs to process */
	copy_parent_rrr = true;
      } else {
	/* store parent rrr in Z at column new_ftt */
	copy_parent_rrr = false;
      }
      new_ftt1 = Zindex[my_first    ];
      new_ftt2 = Zindex[my_first + 1];

      if (copy_parent_rrr == true) {
	/* Copy parent RRR into alloceted arrays and mark them
	 * for freeing later */
	D_parent = (double *) malloc(bl_size * sizeof(double));
	assert(D_parent != NULL);
	
	L_parent = (double *) malloc(bl_size * sizeof(double));
	assert(L_parent != NULL);

	memcpy(D_parent, RRR->D, bl_size*sizeof(double));
	memcpy(L_parent, RRR->L, bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, 
				    NULL, bl_size, depth);
	PMR_set_copied_parent_rrr_flag(RRR_parent, true);

      } else {
	/* copy parent RRR into Z to make cluster task independent */
	memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
	       bl_size*sizeof(double));
	memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
	       bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin],
				    &Z[new_ftt2*ldz + bl_begin],
				    NULL, NULL, bl_size, depth);
      }
      
      /* Create the task for the cluster and put it in the queue */ 
      task = PMR_create_c_task(new_first, new_last, depth+1, 
			       bl_begin, bl_end, bl_spdiam, lgap, 
			       cl->proc_W_begin, cl->proc_W_end, 
			       new_lpid, new_rpid, RRR_parent);

      if (new_lpid != new_rpid)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
	PMR_insert_task_at_back(workQ->c_queue, task);

      task_inserted = true;
      
    } /* if singleton or cluster found */

    new_first = i + 1;
  } /* end i */
  
  /* set flag in RRR that last singleton is created */
  PMR_set_parent_processed_flag(RRR);
  
  /* clean up */
  PMR_try_destroy_rrr(RRR);
  free(cl);

  return 0;
} /* end create_subtasks */
Пример #3
0
/*
 * Initialize work queue by putting all tasks for the process
 * into the work queue.
 */
static 
int init_workQ
(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ)
{
  int              pid      = procinfo->pid;
  int              nproc    = procinfo->nproc;
  int              nthreads = procinfo->nthreads;
  double *restrict D        = Dstruct->D;
  double *restrict L        = Dstruct->E;
  int              nsplit   = Dstruct->nsplit;
  int    *restrict isplit   = Dstruct->isplit;
  double *restrict W        = Wstruct->W;
  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict iproc    = Wstruct->iproc;
  double *restrict Wshifted = Wstruct->Wshifted;
  double *restrict gersch   = Wstruct->gersch;
  int              nz       = *nzp;

  /* Loop over blocks */
  int i, j, k, l;
  int ibegin  = 0;
  for ( j=0; j<nsplit; j++ ) {
    int iend = isplit[j] - 1;
    int isize = iend - ibegin + 1;
    double sigma = L[iend];

    /* Use Gerschgorin disks to find spectral diameter */
    double gl = gersch[2*ibegin    ];
    double gu = gersch[2*ibegin + 1];
    for (i=ibegin+1; i<iend; i++) {
      gl = fmin(gl, gersch[2*i    ]);
      gu = fmax(gu, gersch[2*i + 1]);
    }
    double spdiam = gu - gl;
    double avggap = spdiam / (isize-1);

    /* Find eigenvalues in block */
    int nbl = 0;
    int iWbegin = iend   + 1;
    int iWend   = ibegin - 1;
    for (i=ibegin; i<=iend; i++) {
      if (nbl == 0 && iproc[i] == pid) {
        iWbegin = i;
        iWend   = i;
        nbl++;
        k = i+1;
        while (k <=iend && iproc[k] == pid) {
          iWend++;
          nbl++;
          k++;
        }
        /* iWend = iWbegin + nbl - 1; instead of incrementing in loop */
      }
    }

    /* If no eigenvalues for process in block continue */
    if (nbl == 0) {
      ibegin = iend + 1;
      continue;
    }

    /* Compute DL and DLL for later computation of singletons
     * (freed when all singletons of root are computed) */
    double *DL = (double*)malloc(isize*sizeof(double));
    assert(DL != NULL);
    
    double *DLL = (double*)malloc(isize*sizeof(double));
    assert(DLL != NULL);

    for (i=0; i<isize-1; i++) {
      double tmp = D[i+ibegin]*L[i+ibegin];
      DL[i]      = tmp;
      DLL[i]     = tmp*L[i+ibegin];
    }

    rrr_t *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0);
    PMR_increment_rrr_dependencies(RRR);
    
    /* In W apply shift of current block to eigenvalues
     * to get unshifted values w.r.t. T */
    for (i=ibegin; i<=iend; i++)
      W[i] += sigma;

    /* Split eigenvalues of block into singletons and clusters
     * and add them to process work queue */
    int max_size = imax(1, nz/nthreads);
    bool task_inserted = false;
    int new_first=ibegin, new_last;
    int sn_first, sn_last, sn_size;
    for (i=ibegin; i<=iend; i++) {
      if (i == iend)
        new_last = i;
      else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]))
        new_last = i;
      else
        continue;

      /* Skip rest if no eigenvalues of process */
      if (new_first > iWend || new_last < iWbegin) {
        new_first = i + 1;
        continue;
      }

      int new_size = new_last - new_first + 1;
      
      if (new_size == 1) {
        /* Singleton was found */
        if (new_first < iWbegin || new_first > iWend) {
          new_first = i + 1;
          continue;
        } else {
          if (new_first==iWbegin || task_inserted==true) {
            /* Initialize new singleton task */
            sn_first = new_first;
            sn_last  = new_first;
            sn_size  = 1;
          } else {
            /* Extend singleton task by one */
            sn_last++;
            sn_size++;
          }
        }

        /* Insert task if ... */
        if (i==iWend || sn_size>=max_size ||
            Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

          double lgap;
          if (sn_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[sn_first-1];
          }

          PMR_increment_rrr_dependencies(RRR);

          task_t *task = 
            PMR_create_s_task
            (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR);
          
          PMR_insert_task_at_back(workQ->s_queue, task);
          
          task_inserted = true;
        } else {
          task_inserted = false;
        }
      } else {
        /* Cluster was found */
        int cl_first = new_first;
        int cl_last  = new_last;
        int cl_size  = new_size;

        /* Split cluster into clusters by absolut criterion */
        if (cl_size > 3) {
          /* Split cluster to smaller clusters [cl_first:cl_last] */
          for (k=new_first+1; k<new_last; k++) {
            if (k == new_last-1)
              cl_last = new_last;
            else if (k != cl_first && Wgap[k] > 0.8*avggap)
              cl_last = k;
            else
              continue;

            /* Skip cluster if no eigenvalues of process in it */
            if (cl_last < iWbegin || cl_first > iWend) {
              cl_first = k + 1;
              continue;
            }

            /* Record left gap of cluster */
            double lgap;
            if (cl_first == ibegin) {
              lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl);
            } else {
              lgap = Wgap[cl_first-1];
            }

            /* Determine processes involved in processing the cluster */
            int left_pid  = nproc-1;
            int right_pid = 0;
            for (l=cl_first; l<=cl_last; l++) {
              if (iproc[l] != -1) {
                left_pid  = imin(left_pid,  iproc[l]);
                right_pid = imax(right_pid, iproc[l]);
              }
            }

            rrr_t *RRR_parent = 
              PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

            task_t *task = 
              PMR_create_c_task
              (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, 
               iWend, left_pid, right_pid, RRR_parent);

            /* Insert task into queue, depending if cluster need
             * communication with other processes */
            if (left_pid != right_pid)
              PMR_insert_task_at_back(workQ->r_queue, task);
            else
              PMR_insert_task_at_back(workQ->c_queue, task);
            
            cl_first = k + 1;
          } /* end k */
        } else {
          /* Cluster is too small to split, so insert it to queue */

          /* Record left gap of cluster */
          double lgap;
          if (cl_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[cl_first-1];
          }

          /* Determine processes involved */
          int left_pid  = nproc-1;
          int right_pid = 0;
          for (l=cl_first; l<=cl_last; l++) {
            if (iproc[l] != -1) {
              left_pid  = imin(left_pid,  iproc[l]);
              right_pid = imax(right_pid, iproc[l]);
            }
          }

          rrr_t *RRR_parent = 
            PMR_create_rrr
            (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

          task_t *task = 
            PMR_create_c_task
            (cl_first, cl_last, 1, ibegin, 
             iend, spdiam, lgap, iWbegin, iWend,
             left_pid, right_pid, RRR_parent);

          /* Insert task into queue, depending if cluster need
           * communication with other processes */
          if (left_pid != right_pid)
            PMR_insert_task_at_back(workQ->r_queue, task);
          else
            PMR_insert_task_at_back(workQ->c_queue, task);
        }
        task_inserted = true;
      } /* end new_size */

      new_first = i + 1;
    } /* end of splitting eigenvalues into tasks */

    /* Set flag in RRR that last singleton is created */
    PMR_set_parent_processed_flag(RRR);
    PMR_try_destroy_rrr(RRR);

    ibegin = iend + 1;
  } /* end loop over blocks */ 

  return 0;
}