コード例 #1
0
__device__
void
storeInterval(unsigned int addr,
              NumericT *s_left, NumericT *s_right,
              T *s_left_count, T *s_right_count,
              NumericT left, NumericT right,
              S left_count, S right_count,
              NumericT precision)
{
    s_left_count[addr] = left_count;
    s_right_count[addr] = right_count;

    // check if interval converged
    NumericT t0 = abs(right - left);
    NumericT t1 = max(abs(left), abs(right)) * precision;

    if (t0 <= max(static_cast<NumericT>(MIN_ABS_INTERVAL), t1))
    {
        // compute mid point
        NumericT lambda = computeMidpoint(left, right);

        // mark as converged
        s_left[addr] = lambda;
        s_right[addr] = lambda;
    }
    else
    {

        // store current limits
        s_left[addr] = left;
        s_right[addr] = right;
    }
}
コード例 #2
0
__global__
void
bisectKernelLarge_OneIntervals(const NumericT *g_d, const NumericT *g_s, const unsigned int n,
                               unsigned int num_intervals,
                               NumericT *g_left, NumericT *g_right,
                               unsigned int *g_pos,
                               NumericT  precision)
{

  const unsigned int gtid = (blockDim.x * blockIdx.x) + threadIdx.x;

  __shared__  NumericT  s_left_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK];
  __shared__  NumericT  s_right_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK];

  // active interval of thread
  // left and right limit of current interval
  NumericT left, right;
  // number of threads smaller than the right limit (also corresponds to the
  // global index of the eigenvalues contained in the active interval)
  unsigned int right_count;
  // flag if current thread converged
  unsigned int converged = 0;
  // midpoint when current interval is subdivided
  NumericT mid = 0.0f;
  // number of eigenvalues less than mid
  unsigned int mid_count = 0;

  // read data from global memory
  if (gtid < num_intervals)
  {
    left = g_left[gtid];
    right = g_right[gtid];
    right_count = g_pos[gtid];
  }


  // flag to determine if all threads converged to eigenvalue
  __shared__  unsigned int  converged_all_threads;

  // initialized shared flag
  if (0 == threadIdx.x)
  {
    converged_all_threads = 0;
  }

  __syncthreads();

  // process until all threads converged to an eigenvalue
  while (true)
  {

    converged_all_threads = 1;

    // update midpoint for all active threads
    if ((gtid < num_intervals) && (0 == converged))
    {
      mid = computeMidpoint(left, right);
    }

    // find number of eigenvalues that are smaller than midpoint
    mid_count = computeNumSmallerEigenvalsLarge(g_d, g_s, n,
                                                mid, gtid, num_intervals,
                                                s_left_scratch,
                                                s_right_scratch,
                                                converged);

    __syncthreads();

    // for all active threads
    if ((gtid < num_intervals) && (0 == converged))
    {

      // update intervals -- always one child interval survives
      if (right_count == mid_count)
      {
        right = mid;
      }
      else
      {
        left = mid;
      }

      // check for convergence
      NumericT t0 = right - left;
      NumericT t1 = max(abs(right), abs(left)) * precision;

      if (t0 < min(precision, t1))
      {
        NumericT lambda = computeMidpoint(left, right);
        left = lambda;
        right = lambda;

        converged = 1;
      }
      else
      {
        converged_all_threads = 0;
      }
    }

    __syncthreads();

    if (1 == converged_all_threads)
    {
      break;
    }

    __syncthreads();
  }

  // write data back to global memory
  __syncthreads();

  if (gtid < num_intervals)
  {
      // intervals converged so left and right interval limit are both identical
      // and identical to the eigenvalue
      g_left[gtid] = left;
  }
}