int main() { int numbers[] = { 2, 7, 11 ,15 }, n = 4, target = 9; int test1[] = { 3, 4, 9, 10, 11, 24 }, n1 = 6, target1 = 15; int test2[] = { 3, 4, 9, 10, 11, 24 }, n2 = 6, target2 = 21; two_sum(numbers, target, n); two_sum(test1, target1, n1); two_sum(test2, target2, n2); }
// this code is only for minimum score, for maximum score the algorithm is excactly the same except // taking max instead of min in the comparison int main(){ int num; cin >> num; std::vector<int> heaps; for (int i = 0; i < num; ++i) { int t; cin >> t; heaps.push_back(t); } std::vector<int> two_sum(num,0), dp(num-1,0); int cur_min = INT_MAX, cur_idx = 0; for (int i = 0; i < num; ++i) { if (i == 0) { two_sum[i] = heaps[i] + heaps.back(); } else{ two_sum[i] = heaps[i] + heaps[i-1]; } if (cur_min > two_sum[i]) { cur_min = two_sum[i]; cur_idx = i; } } for (int i = 0; i < num-1; ++i) { // skip the final results first, print out the intermediate resutls, and the last result is the final points. dp[i] = cur_min + dp[i-1]; cout << dp[i] << " "; // merge two elements in the original array, round is a helper function that ensure idx is within the bourdary heaps[cur_min] += heaps[round(cur_min-1)]; // update the two_sum result after merging two_sum[round(cur_idx-1)] += heaps[round(cur_idx-2)]; two_sum[round(cur_idx+1)] += heaps[round(cur_idx-1)]; // delete the element that is merged heaps.erase(heaps.begin() + round(cur_idx-1)); two_sum.erase(two_sum.begin + cur_idx); // find the next merge target cur_min = INT_MAX; for(int j = 0 ; j < two_sum.size(); j++){ cout << heaps[j] << " "; if (two_sum[j] > cur_min) { cur_min = two_sum[j]; cur_idx = j; } } cout << endl; } }
//============================================================================== // a and sum may be aliased to the same expansion for in-place addition void add(const expansion& a, double b, expansion& sum) { unsigned int m=a.size(); sum.reserve(m+1); double s; for(unsigned int i=0; i<m; ++i){ two_sum(b, a[i], b, s); if(s) sum.push_back(s); } sum.push_back(b); }
//============================================================================== // aliasing a, b and sum is safe void add(const expansion& a, const expansion& b, expansion& sum) { /* slow but obvious way of doing it add(a, b[0], sum); for(unsigned int i=1; i<b.size(); ++i) add(sum, b[i], sum); // aliasing sum is safe */ // Shewchuk's fast-expansion-sum if(a.empty()){ sum=b; return; }else if(b.empty()){ sum=a; return; } expansion merge(a.size()+b.size(), 0); unsigned int i=0, j=0, k=0; for(;;){ if(std::fabs(a[i])<std::fabs(b[j])){ merge[k++]=a[i++]; if(i==a.size()){ while(j<b.size()) merge[k++]=b[j++]; break; } }else{ merge[k++]=b[j++]; if(j==b.size()){ while(i<a.size()) merge[k++]=a[i++]; break; } } } sum.reserve(merge.size()); sum.resize(0); double q, r; fast_two_sum(merge[1], merge[0], q, r); if(r) sum.push_back(r); for(i=2; i<merge.size(); ++i){ two_sum(q, merge[i], q, r); if(r) sum.push_back(r); } if(q) sum.push_back(q); }
//============================================================================== void multiply(const expansion& a, double b, expansion& product) { // basic idea: // multiply each entry in a by b (producing two new entries), then // two_sum them in such a way to guarantee increasing/non-overlapping output product.resize(2*a.size()); if(a.empty()) return; two_product(a[0], b, product[1], product[0]); // finalize product[0] double x, y, z; for(unsigned int i=1; i<a.size(); ++i){ two_product(a[i], b, x, y); // finalize product[2*i-1] two_sum(product[2*i-1], y, z, product[2*i-1]); // finalize product[2*i], could be fast_two_sum instead fast_two_sum(x, z, product[2*i+1], product[2*i]); } // multiplication is a prime candidate for producing spurious zeros, so // remove them by default remove_zeros(product); }
//============================================================================== void add(double a, double b, expansion& sum) { sum.resize(2); two_sum(a, b, sum[1], sum[0]); }
vector<vector<int> > threeSum(vector<int> &num) { vector<vector<int> > ret; int nzbegin = -1; // non-zero element index beginning if (num.size() < 3) { // error checking return ret; } // sort the array sort(num.begin(), num.end()); // do some preprocessing work if (num[0] < 0) { for (int i = 1; i < num.size(); i++) { if (num[i] >= 0) { nzbegin = i; break; } } if (nzbegin == -1 || num[num.size() - 1] == 0) { return ret; } } else if (num[0] == 0 && num[1] == 0 && num[2] == 0) { return {{0, 0, 0}}; } else { return ret; } // one negative number, two positive numbers int last_match = numeric_limits<int>::max(); // sentinel value if (nzbegin < num.size() - 1) { for (int i = 0; i < nzbegin; i++) { if (num[i] == last_match) { continue; } int match = -num[i]; two_sum(ret, num, num[i], match, nzbegin, num.size(), true); last_match = num[i]; } } // two negative numbers, one positive number last_match = numeric_limits<int>::max(); if (nzbegin > 1) { for (int i = nzbegin; i < num.size(); i++) { if (num[i] == last_match) { continue; } int match = -num[i]; two_sum(ret, num, num[i], match, 0, nzbegin, false); last_match = num[i]; } } // three zero but has negative numbers if (nzbegin < num.size() - 2 && num[nzbegin + 2] == 0) { ret.push_back({0, 0, 0}); } return ret; }
void test_csrmv() { clsparseStatus status; cl_int cl_status; clsparseEnableExtendedPrecision(CLSE::control, extended_precision); if (typeid(T) == typeid(cl_float) ) { status = clsparseScsrmv(&gAlpha, &CSRE::csrSMatrix, &gX, &gBeta, &gY, CLSE::control); ASSERT_EQ(clsparseSuccess, status); float* vals = (float*)&CSRE::ublasSCsr.value_data()[0]; int* rows = &CSRE::ublasSCsr.index1_data()[0]; int* cols = &CSRE::ublasSCsr.index2_data()[0]; for (int row = 0; row < CSRE::n_rows; row++) { // Summation done at a higher precision to decrease // summation errors from rounding. hY[row] *= hBeta; int row_end = rows[row+1]; double temp_sum; temp_sum = hY[row]; for (int i = rows[row]; i < rows[row+1]; i++) { // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]]; temp_sum += hAlpha * vals[i] * hX[cols[i]]; } hY[row] = temp_sum; } T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values, CL_TRUE, CL_MAP_READ, 0, gY.num_values * sizeof(T), 0, nullptr, nullptr, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); uint64_t max_ulps = 0; uint64_t min_ulps = UINT64_MAX; uint64_t total_ulps = 0; for (int i = 0; i < hY.size(); i++) { long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]); intDiff = llabs(intDiff); total_ulps += intDiff; if (max_ulps < intDiff) max_ulps = intDiff; if (min_ulps > intDiff) min_ulps = intDiff; // Debug printouts. //std::cout << "Row " << i << " Float Ulps: " << intDiff << std::endl; //std::cout.precision(9); //std::cout << "\tFloat hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint32_t *)&hY[i] << "), " << std::dec; //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint32_t *)&host_result[i] << ")" << std::dec << std::endl; } #ifndef NDEBUG if (extended_precision) { std::cout << "Float Min ulps: " << min_ulps << std::endl; std::cout << "Float Max ulps: " << max_ulps << std::endl; std::cout << "Float Total ulps: " << total_ulps << std::endl; std::cout << "Float Average ulps: " << (double)total_ulps/(double)hY.size() << " (Size: " << hY.size() << ")" << std::endl; } #endif for (int i = 0; i < hY.size(); i++) { double compare_val = 0.; if (extended_precision) { // The limit here is somewhat weak because some GPUs don't // support correctly rounded denorms in SPFP mode. if (boost::math::isnormal(hY[i])) compare_val = fabs(hY[i]*1e-3); } else { if (boost::math::isnormal(hY[i])) compare_val = fabs(hY[i]*0.1); } if (compare_val < 10*FLT_EPSILON) compare_val = 10*FLT_EPSILON; ASSERT_NEAR(hY[i], host_result[i], compare_val); } cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values, host_result, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); } if (typeid(T) == typeid(cl_double) ) { status = clsparseDcsrmv(&gAlpha, &CSRE::csrDMatrix, &gX, &gBeta, &gY, CLSE::control); ASSERT_EQ(clsparseSuccess, status); double* vals = (double*)&CSRE::ublasDCsr.value_data()[0]; int* rows = &CSRE::ublasDCsr.index1_data()[0]; int* cols = &CSRE::ublasDCsr.index2_data()[0]; for (int row = 0; row < CSRE::n_rows; row++) { // Summation done using a compensated summation to decrease // summation errors from rounding. This allows us to get // smaller errors without requiring quad precision support. // This method is like performing summation at quad precision and // casting down to double in the end. hY[row] *= hBeta; int row_end = rows[row+1]; double temp_sum; temp_sum = hY[row]; T sumk_err = 0.; for (int i = rows[row]; i < rows[row+1]; i++) { // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]]; temp_sum = two_sum(temp_sum, hAlpha*vals[i]*hX[cols[i]], &sumk_err); } hY[row] = temp_sum + sumk_err; } T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values, CL_TRUE, CL_MAP_READ, 0, gY.num_values * sizeof(T), 0, nullptr, nullptr, &cl_status); ASSERT_EQ(CL_SUCCESS, cl_status); uint64_t max_ulps = 0; uint64_t min_ulps = ULLONG_MAX; uint64_t total_ulps = 0; for (int i = 0; i < hY.size(); i++) { long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]); intDiff = llabs(intDiff); total_ulps += intDiff; if (max_ulps < intDiff) max_ulps = intDiff; if (min_ulps > intDiff) min_ulps = intDiff; // Debug printouts. //std::cout << "Row " << i << " Double Ulps: " << intDiff << std::endl; //std::cout.precision(17); //std::cout << "\tDouble hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint64_t *)&hY[i] << "), " << std::dec; //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint64_t *)&host_result[i] << ")" << std::dec << std::endl; } if (extended_precision) { #ifndef NDEBUG std::cout << "Double Min ulps: " << min_ulps << std::endl; std::cout << "Double Max ulps: " << max_ulps << std::endl; std::cout << "Double Total ulps: " << total_ulps << std::endl; std::cout << "Double Average ulps: " << (double)total_ulps/(double)hY.size() << " (Size: " << hY.size() << ")" << std::endl; #endif for (int i = 0; i < hY.size(); i++) { double compare_val = fabs(hY[i]*1e-14); if (compare_val < 10*DBL_EPSILON) compare_val = 10*DBL_EPSILON; ASSERT_NEAR(hY[i], host_result[i], compare_val); } } else { for (int i = 0; i < hY.size(); i++) { double compare_val = 0.; if (boost::math::isnormal(hY[i])) compare_val = fabs(hY[i]*0.1); if (compare_val < 10*DBL_EPSILON) compare_val = 10*DBL_EPSILON; ASSERT_NEAR(hY[i], host_result[i], compare_val); } } cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values, host_result, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, cl_status); } // Reset output buffer for next test. ::clReleaseMemObject(gY.values); clsparseInitVector(&gY); gY.values = clCreateBuffer(CLSE::context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, hY.size() * sizeof(T), hY.data().begin(), &cl_status); gY.num_values = hY.size(); ASSERT_EQ(CL_SUCCESS, cl_status); }