__device__ void storeInterval(unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision) { s_left_count[addr] = left_count; s_right_count[addr] = right_count; // check if interval converged NumericT t0 = abs(right - left); NumericT t1 = max(abs(left), abs(right)) * precision; if (t0 <= max(static_cast<NumericT>(MIN_ABS_INTERVAL), t1)) { // compute mid point NumericT lambda = computeMidpoint(left, right); // mark as converged s_left[addr] = lambda; s_right[addr] = lambda; } else { // store current limits s_left[addr] = left; s_right[addr] = right; } }
__global__ void bisectKernelLarge_OneIntervals(const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision) { const unsigned int gtid = (blockDim.x * blockIdx.x) + threadIdx.x; __shared__ NumericT s_left_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK]; __shared__ NumericT s_right_scratch[VIENNACL_BISECT_MAX_THREADS_BLOCK]; // active interval of thread // left and right limit of current interval NumericT left, right; // number of threads smaller than the right limit (also corresponds to the // global index of the eigenvalues contained in the active interval) unsigned int right_count; // flag if current thread converged unsigned int converged = 0; // midpoint when current interval is subdivided NumericT mid = 0.0f; // number of eigenvalues less than mid unsigned int mid_count = 0; // read data from global memory if (gtid < num_intervals) { left = g_left[gtid]; right = g_right[gtid]; right_count = g_pos[gtid]; } // flag to determine if all threads converged to eigenvalue __shared__ unsigned int converged_all_threads; // initialized shared flag if (0 == threadIdx.x) { converged_all_threads = 0; } __syncthreads(); // process until all threads converged to an eigenvalue while (true) { converged_all_threads = 1; // update midpoint for all active threads if ((gtid < num_intervals) && (0 == converged)) { mid = computeMidpoint(left, right); } // find number of eigenvalues that are smaller than midpoint mid_count = computeNumSmallerEigenvalsLarge(g_d, g_s, n, mid, gtid, num_intervals, s_left_scratch, s_right_scratch, converged); __syncthreads(); // for all active threads if ((gtid < num_intervals) && (0 == converged)) { // update intervals -- always one child interval survives if (right_count == mid_count) { right = mid; } else { left = mid; } // check for convergence NumericT t0 = right - left; NumericT t1 = max(abs(right), abs(left)) * precision; if (t0 < min(precision, t1)) { NumericT lambda = computeMidpoint(left, right); left = lambda; right = lambda; converged = 1; } else { converged_all_threads = 0; } } __syncthreads(); if (1 == converged_all_threads) { break; } __syncthreads(); } // write data back to global memory __syncthreads(); if (gtid < num_intervals) { // intervals converged so left and right interval limit are both identical // and identical to the eigenvalue g_left[gtid] = left; } }