Esempio n. 1
0
/* TODO: Refactor this routine */
static inline 
int create_subtasks
(cluster_t *cl, int tid, proc_t *procinfo, 
 rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct,
 workQ_t *workQ, counter_t *num_left)
{
  /* From inputs */
  int              cl_begin  = cl->begin;
  int              cl_end    = cl->end;
  int              depth     = cl->depth;
  int              bl_begin  = cl->bl_begin;
  int              bl_end    = cl->bl_end;
  int              bl_size   = bl_end - bl_begin + 1;
  double           bl_spdiam = cl->bl_spdiam;
  double           lgap;

  int  pid       = procinfo->pid;
  int  nproc     = procinfo->nproc;
  int  nthreads  = procinfo->nthreads;
  bool proc_involved=true;

  double *restrict Wgap      = Wstruct->Wgap;
  double *restrict Wshifted  = Wstruct->Wshifted;
  int    *restrict iproc     = Wstruct->iproc;

  int              ldz       = Zstruct->ldz;
  double *restrict Z         = Zstruct->Z;
  int    *restrict Zindex    = Zstruct->Zindex;

  /* others */
  int    i, l, k;
  int    max_size;
  task_t *task;
  bool   task_inserted;
  int    new_first, new_last, new_size, new_ftt1, new_ftt2;
  int    sn_first, sn_last, sn_size;
  rrr_t  *RRR_parent;
  int    new_lpid, new_rpid;
  double *restrict D_parent;
  double *restrict L_parent;
  int    my_first, my_last;
  bool   copy_parent_rrr;


  max_size = fmax(1, PMR_get_counter_value(num_left) /
		     (fmin(depth+1,4)*nthreads) );
  task_inserted = true;
  new_first = cl_begin;
  for (i=cl_begin; i<=cl_end; i++) {    

    if ( i == cl_end )
      new_last = i;
    else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) )
      new_last = i;
    else
      continue;

    new_size = new_last - new_first + 1;

    if (new_size == 1) {
      /* singleton was found */
      
      if (new_first==cl_begin || task_inserted==true) {
	/* initialize new singleton task */
	sn_first = new_first;
	sn_last  = new_first;
	sn_size  = 1;
      } else {
	/* extend singleton task by one */
	sn_last++;
	sn_size++;
      }
      
      /* insert task if ... */
      if (i==cl_end || sn_size>=max_size ||
	    Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

	/* Check if process involved in s-task */
	proc_involved = false;
	for (k=sn_first; k<=sn_last; k++) {
	  if (iproc[k] == pid) {
	    proc_involved = true;
	    break;
	  }
	}
	if (proc_involved == false) {
	  task_inserted = true;
	  new_first = i + 1;
	  continue;
	}

	/* Insert task as process is involved */
	if (sn_first == cl_begin) {
	  lgap = cl->lgap;
	} else {
	  lgap = Wgap[sn_first-1];
	}
	
	PMR_increment_rrr_dependencies(RRR);
	
	task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin,
				 bl_end, bl_spdiam, lgap, RRR);
	
	PMR_insert_task_at_back(workQ->s_queue, task);
	  
	task_inserted = true;
      } else {
	task_inserted = false;
      }
      
    } else {
      /* cluster was found */

      /* check if process involved in processing the new cluster */
      new_lpid = nproc-1;
      new_rpid = -1;
      for (l=new_first; l<=new_last; l++) {
	if (iproc[l] != -1) {
	  new_lpid = imin(new_lpid, iproc[l]);
	  new_rpid = imax(new_rpid, iproc[l]);
	  }
      }
      if (new_lpid > pid || new_rpid < pid) {
	task_inserted = true;
	new_first = i + 1;
	continue;
      }

      /* find gap to the left */
      if (new_first == cl_begin) {
	lgap = cl->lgap;
      } else {
	lgap = Wgap[new_first - 1];
      }
    
      /* determine where to store the parent rrr needed by the
       * cluster to find its new rrr */
      my_first = imax(new_first, cl->proc_W_begin);
      my_last  = imin(new_last,  cl->proc_W_end);
      if ( my_first == my_last ) {
	/* only one eigenvalue of cluster belongs to process */
	copy_parent_rrr = true;
      } else {
	/* store parent rrr in Z at column new_ftt */
	copy_parent_rrr = false;
      }
      new_ftt1 = Zindex[my_first    ];
      new_ftt2 = Zindex[my_first + 1];

      if (copy_parent_rrr == true) {
	/* Copy parent RRR into alloceted arrays and mark them
	 * for freeing later */
	D_parent = (double *) malloc(bl_size * sizeof(double));
	assert(D_parent != NULL);
	
	L_parent = (double *) malloc(bl_size * sizeof(double));
	assert(L_parent != NULL);

	memcpy(D_parent, RRR->D, bl_size*sizeof(double));
	memcpy(L_parent, RRR->L, bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, 
				    NULL, bl_size, depth);
	PMR_set_copied_parent_rrr_flag(RRR_parent, true);

      } else {
	/* copy parent RRR into Z to make cluster task independent */
	memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, 
	       bl_size*sizeof(double));
	memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, 
	       bl_size*sizeof(double));

	RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin],
				    &Z[new_ftt2*ldz + bl_begin],
				    NULL, NULL, bl_size, depth);
      }
      
      /* Create the task for the cluster and put it in the queue */ 
      task = PMR_create_c_task(new_first, new_last, depth+1, 
			       bl_begin, bl_end, bl_spdiam, lgap, 
			       cl->proc_W_begin, cl->proc_W_end, 
			       new_lpid, new_rpid, RRR_parent);

      if (new_lpid != new_rpid)
	PMR_insert_task_at_back(workQ->r_queue, task);
      else
	PMR_insert_task_at_back(workQ->c_queue, task);

      task_inserted = true;
      
    } /* if singleton or cluster found */

    new_first = i + 1;
  } /* end i */
  
  /* set flag in RRR that last singleton is created */
  PMR_set_parent_processed_flag(RRR);
  
  /* clean up */
  PMR_try_destroy_rrr(RRR);
  free(cl);

  return 0;
} /* end create_subtasks */
Esempio n. 2
0
/*
 * Initialize work queue by putting all tasks for the process
 * into the work queue.
 */
static 
int init_workQ
(proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ)
{
  int              pid      = procinfo->pid;
  int              nproc    = procinfo->nproc;
  int              nthreads = procinfo->nthreads;
  double *restrict D        = Dstruct->D;
  double *restrict L        = Dstruct->E;
  int              nsplit   = Dstruct->nsplit;
  int    *restrict isplit   = Dstruct->isplit;
  double *restrict W        = Wstruct->W;
  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict iproc    = Wstruct->iproc;
  double *restrict Wshifted = Wstruct->Wshifted;
  double *restrict gersch   = Wstruct->gersch;
  int              nz       = *nzp;

  /* Loop over blocks */
  int i, j, k, l;
  int ibegin  = 0;
  for ( j=0; j<nsplit; j++ ) {
    int iend = isplit[j] - 1;
    int isize = iend - ibegin + 1;
    double sigma = L[iend];

    /* Use Gerschgorin disks to find spectral diameter */
    double gl = gersch[2*ibegin    ];
    double gu = gersch[2*ibegin + 1];
    for (i=ibegin+1; i<iend; i++) {
      gl = fmin(gl, gersch[2*i    ]);
      gu = fmax(gu, gersch[2*i + 1]);
    }
    double spdiam = gu - gl;
    double avggap = spdiam / (isize-1);

    /* Find eigenvalues in block */
    int nbl = 0;
    int iWbegin = iend   + 1;
    int iWend   = ibegin - 1;
    for (i=ibegin; i<=iend; i++) {
      if (nbl == 0 && iproc[i] == pid) {
        iWbegin = i;
        iWend   = i;
        nbl++;
        k = i+1;
        while (k <=iend && iproc[k] == pid) {
          iWend++;
          nbl++;
          k++;
        }
        /* iWend = iWbegin + nbl - 1; instead of incrementing in loop */
      }
    }

    /* If no eigenvalues for process in block continue */
    if (nbl == 0) {
      ibegin = iend + 1;
      continue;
    }

    /* Compute DL and DLL for later computation of singletons
     * (freed when all singletons of root are computed) */
    double *DL = (double*)malloc(isize*sizeof(double));
    assert(DL != NULL);
    
    double *DLL = (double*)malloc(isize*sizeof(double));
    assert(DLL != NULL);

    for (i=0; i<isize-1; i++) {
      double tmp = D[i+ibegin]*L[i+ibegin];
      DL[i]      = tmp;
      DLL[i]     = tmp*L[i+ibegin];
    }

    rrr_t *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0);
    PMR_increment_rrr_dependencies(RRR);
    
    /* In W apply shift of current block to eigenvalues
     * to get unshifted values w.r.t. T */
    for (i=ibegin; i<=iend; i++)
      W[i] += sigma;

    /* Split eigenvalues of block into singletons and clusters
     * and add them to process work queue */
    int max_size = imax(1, nz/nthreads);
    bool task_inserted = false;
    int new_first=ibegin, new_last;
    int sn_first, sn_last, sn_size;
    for (i=ibegin; i<=iend; i++) {
      if (i == iend)
        new_last = i;
      else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]))
        new_last = i;
      else
        continue;

      /* Skip rest if no eigenvalues of process */
      if (new_first > iWend || new_last < iWbegin) {
        new_first = i + 1;
        continue;
      }

      int new_size = new_last - new_first + 1;
      
      if (new_size == 1) {
        /* Singleton was found */
        if (new_first < iWbegin || new_first > iWend) {
          new_first = i + 1;
          continue;
        } else {
          if (new_first==iWbegin || task_inserted==true) {
            /* Initialize new singleton task */
            sn_first = new_first;
            sn_last  = new_first;
            sn_size  = 1;
          } else {
            /* Extend singleton task by one */
            sn_last++;
            sn_size++;
          }
        }

        /* Insert task if ... */
        if (i==iWend || sn_size>=max_size ||
            Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) {

          double lgap;
          if (sn_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[sn_first-1];
          }

          PMR_increment_rrr_dependencies(RRR);

          task_t *task = 
            PMR_create_s_task
            (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR);
          
          PMR_insert_task_at_back(workQ->s_queue, task);
          
          task_inserted = true;
        } else {
          task_inserted = false;
        }
      } else {
        /* Cluster was found */
        int cl_first = new_first;
        int cl_last  = new_last;
        int cl_size  = new_size;

        /* Split cluster into clusters by absolut criterion */
        if (cl_size > 3) {
          /* Split cluster to smaller clusters [cl_first:cl_last] */
          for (k=new_first+1; k<new_last; k++) {
            if (k == new_last-1)
              cl_last = new_last;
            else if (k != cl_first && Wgap[k] > 0.8*avggap)
              cl_last = k;
            else
              continue;

            /* Skip cluster if no eigenvalues of process in it */
            if (cl_last < iWbegin || cl_first > iWend) {
              cl_first = k + 1;
              continue;
            }

            /* Record left gap of cluster */
            double lgap;
            if (cl_first == ibegin) {
              lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl);
            } else {
              lgap = Wgap[cl_first-1];
            }

            /* Determine processes involved in processing the cluster */
            int left_pid  = nproc-1;
            int right_pid = 0;
            for (l=cl_first; l<=cl_last; l++) {
              if (iproc[l] != -1) {
                left_pid  = imin(left_pid,  iproc[l]);
                right_pid = imax(right_pid, iproc[l]);
              }
            }

            rrr_t *RRR_parent = 
              PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

            task_t *task = 
              PMR_create_c_task
              (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, 
               iWend, left_pid, right_pid, RRR_parent);

            /* Insert task into queue, depending if cluster need
             * communication with other processes */
            if (left_pid != right_pid)
              PMR_insert_task_at_back(workQ->r_queue, task);
            else
              PMR_insert_task_at_back(workQ->c_queue, task);
            
            cl_first = k + 1;
          } /* end k */
        } else {
          /* Cluster is too small to split, so insert it to queue */

          /* Record left gap of cluster */
          double lgap;
          if (cl_first == ibegin) {
            lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl );
          } else {
            lgap = Wgap[cl_first-1];
          }

          /* Determine processes involved */
          int left_pid  = nproc-1;
          int right_pid = 0;
          for (l=cl_first; l<=cl_last; l++) {
            if (iproc[l] != -1) {
              left_pid  = imin(left_pid,  iproc[l]);
              right_pid = imax(right_pid, iproc[l]);
            }
          }

          rrr_t *RRR_parent = 
            PMR_create_rrr
            (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0);

          task_t *task = 
            PMR_create_c_task
            (cl_first, cl_last, 1, ibegin, 
             iend, spdiam, lgap, iWbegin, iWend,
             left_pid, right_pid, RRR_parent);

          /* Insert task into queue, depending if cluster need
           * communication with other processes */
          if (left_pid != right_pid)
            PMR_insert_task_at_back(workQ->r_queue, task);
          else
            PMR_insert_task_at_back(workQ->c_queue, task);
        }
        task_inserted = true;
      } /* end new_size */

      new_first = i + 1;
    } /* end of splitting eigenvalues into tasks */

    /* Set flag in RRR that last singleton is created */
    PMR_set_parent_processed_flag(RRR);
    PMR_try_destroy_rrr(RRR);

    ibegin = iend + 1;
  } /* end loop over blocks */ 

  return 0;
}
Esempio n. 3
0
static inline 
rrr_t* compute_new_rrr
(cluster_t *cl, int tid, proc_t *procinfo,
 val_t *Wstruct, vec_t *Zstruct,
 tol_t *tolstruct, double *work, int *iwork)
{
  int    cl_begin    = cl->begin;
  int    cl_end      = cl->end;
  int    cl_size     = cl_end - cl_begin + 1;
  int    depth       = cl->depth;
  int    bl_begin    = cl->bl_begin;
  int    bl_end      = cl->bl_end;
  int    bl_size     = bl_end - bl_begin + 1;
  double bl_spdiam   = cl->bl_spdiam;
  rrr_t  *RRR_parent = cl->RRR;

  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict Windex   = Wstruct->Windex;
  double *restrict Wshifted = Wstruct->Wshifted;

  /* Allocate memory for new representation for cluster */
  double *D = (double*)malloc(bl_size*sizeof(double)); 
  double *L = (double*)malloc(bl_size*sizeof(double)); 
  double *DL = (double*)malloc(bl_size*sizeof(double)); 
  double *DLL = (double*)malloc(bl_size*sizeof(double)); 
  assert(D!=NULL);
  assert(L!=NULL);
  assert(DL!=NULL);
  assert(DLL!=NULL);

  /* Recompute DL and DLL */
  int i;
  double tmp;
  double *D_parent = RRR_parent->D;
  double *L_parent = RRR_parent->L;
  for (i=0; i<bl_size-1; i++) {
    tmp    = D_parent[i]*L_parent[i];
    DL[i]  = tmp;
    DLL[i] = tmp*L_parent[i];
  }
  double *DL_parent  = DL;
  double *DLL_parent = DLL;

  double RQtol = 2*DBL_EPSILON;
  double pivmin = tolstruct->pivmin;

  /* to shift as close as possible refine extremal eigenvalues */
  int k, p;
  double savegap;
  for (k=0; k<2; k++) {
    if (k == 0) {
      p              = Windex[cl_begin];
      savegap        = Wgap[cl_begin];
      Wgap[cl_begin] = 0.0;
    } else {
      p              = Windex[cl_end  ];
      savegap        = Wgap[cl_end];
      Wgap[cl_end]   = 0.0;
    }
    
    int info;
    int offset  = Windex[cl_begin] - 1;
    odrrb
    (&bl_size, D_parent, DLL_parent, &p, &p, &RQtol,
     &RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin],
     &Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
    assert( info == 0 );

    if (k == 0) {
      Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1])
			    - (Wshifted[cl_begin]+Werr[cl_begin]) );
    } else {
      Wgap[cl_end]   = savegap;
    }
  } /* end k */

  double left_gap  = cl->lgap;
  double right_gap = Wgap[cl_end];

  /* Compute new RRR and store it in D and L */
  int info;
  int IONE=1;
  double tau;
  odrrf
  (&bl_size, D_parent, L_parent, DL_parent,
   &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin],
   &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap,
   &pivmin, &tau, D, L, work, &info);
  assert(info == 0);

  /* Update shift and store it */
  tmp = L_parent[bl_size-1] + tau;
  L[bl_size-1] = tmp;

  /* Compute D*L and D*L*L */
  for (i=0; i<bl_size-1; i++) {
    tmp    = D[i]*L[i];
    DL[i]  = tmp;
    DLL[i] = tmp*L[i];
  }

  /* New RRR of cluster is usually created at the parent level and
   * initialized to parent RRR, now reset to contain new RRR */
  if (RRR_parent->copied_parent_rrr == true) {
    free(RRR_parent->D);
    free(RRR_parent->L);
  }
  rrr_t *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1);
  
  /* Update shifted eigenvalues */
  for (k=cl_begin; k<=cl_end; k++) {
    double fudge = THREE*DBL_EPSILON*fabs(Wshifted[k]);
    Wshifted[k] -= tau;
    fudge += FOUR*DBL_EPSILON*fabs(Wshifted[k]);
    Werr[k] += fudge;
  }

  /* Assure that structure is not freed while it is processed */
  PMR_increment_rrr_dependencies(RRR);

  return RRR;
} /* end compute_new_rrr */
Esempio n. 4
0
static inline void init_workQ(workQ_t *workQ, in_t *Dstruct, 
			      val_t *Wstruct)
{
  double *restrict D      = Dstruct->D;
  double *restrict L      = Dstruct->E;
  int    *restrict isplit = Dstruct->isplit;
  int              m      = Wstruct->m;
  double           *vlp   = Wstruct->vlp;
  double *restrict W      = Wstruct->W;
  double *restrict Werr   = Wstruct->Werr;
  int    *restrict iblock = Wstruct->iblock;

  int              j;
  int              begin ,  end;
  int              Wbegin, Wend;
  int              nbl;
  double           sigma;
  rrr_t            *RRR;
  double           lgap;
  task_t           *task;

  /* For every unreducible block of the matrix create a task
   * and put it in the queue */
  begin  = 0;
  Wbegin = 0;

  for (j=0; j<Dstruct->nsplit; j++) {

    end   = isplit[j] - 1;
    sigma = L[end];

    Wend = Wbegin-1;
    while (Wend < m-1 && iblock[Wend + 1] == j+1) {
	Wend++;
    }

    if (Wend < Wbegin) {
      begin = end + 1; 
      continue;
    }

    nbl = end - begin  + 1;

    RRR = PMR_create_rrr(&D[begin], &L[begin], NULL, NULL, nbl, -1);
  
    if (nbl == 1) {
      /* To make sure that RRR is freed when s-task is processed */
      PMR_increment_rrr_dependencies(RRR);
      PMR_set_parent_processed_flag(RRR);

      task = PMR_create_s_task(Wbegin, Wbegin, 1, begin, end, Wbegin, 
			       Wend, 0, 0, RRR);
      PMR_insert_task_at_back(workQ->s_queue, task);
      
      begin  = end  + 1;
      Wbegin = Wend + 1;
      continue;
    }

    lgap = fmax(0.0, (W[Wbegin]+sigma) - Werr[Wbegin] - (*vlp) );

    task = PMR_create_c_task(Wbegin, Wend, 0, begin, end, Wbegin, 
			     Wend, 0, lgap, RRR);
  
    PMR_insert_task_at_back(workQ->c_queue, task);

    begin  = end  + 1;
    Wbegin = Wend + 1;
  }
  /* end of loop over block */
}