Пример #1
0
int main()
{
    int numbers[] = { 2, 7, 11 ,15 }, n = 4, target = 9;
    int test1[] = { 3, 4, 9, 10, 11, 24 }, n1 = 6, target1 = 15;
    int test2[] = { 3, 4, 9, 10, 11, 24 }, n2 = 6, target2 = 21;
    two_sum(numbers, target, n);
    two_sum(test1, target1, n1);
    two_sum(test2, target2, n2);
}
Пример #2
0
// this code is only for minimum score, for maximum score the algorithm is excactly the same except
// taking max instead of min in the comparison
int main(){

	int num;
	cin >> num;
	std::vector<int> heaps;
	for (int i = 0; i < num; ++i)
	{
		int t;
		cin >> t;
		heaps.push_back(t);
	}

	std::vector<int> two_sum(num,0), dp(num-1,0);
	int cur_min = INT_MAX, cur_idx = 0;
	for (int i = 0; i < num; ++i)
	{
		if (i == 0)
		{
			two_sum[i] = heaps[i] + heaps.back();
		}
		else{
			two_sum[i] = heaps[i] + heaps[i-1];
		}
		if (cur_min > two_sum[i])
		{
			cur_min	= two_sum[i];
			cur_idx = i;
		}
	}
	for (int i = 0; i < num-1; ++i)
	{
		// skip the final results first, print out the intermediate resutls, and the last result is the final points. 
		dp[i] = cur_min + dp[i-1];
		cout << dp[i] << " ";
		
		// merge two elements in the original array, round is a helper function that ensure idx is within the bourdary
		heaps[cur_min] += heaps[round(cur_min-1)];

		// update the two_sum result after merging
		two_sum[round(cur_idx-1)] += heaps[round(cur_idx-2)];
		two_sum[round(cur_idx+1)] += heaps[round(cur_idx-1)];

		// delete the element that is merged 
		heaps.erase(heaps.begin() + round(cur_idx-1));
		two_sum.erase(two_sum.begin + cur_idx);

		// find the next merge target
		cur_min = INT_MAX;
		for(int j = 0 ; j < two_sum.size(); j++){
			cout << heaps[j] << " ";
			if (two_sum[j] > cur_min)
			{
				cur_min = two_sum[j];
				cur_idx = j;
			}
		}
		cout << endl;
	}
}
Пример #3
0
//==============================================================================
// a and sum may be aliased to the same expansion for in-place addition
void
add(const expansion& a, double b, expansion& sum)
{
   unsigned int m=a.size();
   sum.reserve(m+1);
   double s;
   for(unsigned int i=0; i<m; ++i){
      two_sum(b, a[i], b, s);
      if(s) sum.push_back(s);
   }
   sum.push_back(b);
}
Пример #4
0
//==============================================================================
// aliasing a, b and sum is safe
void
add(const expansion& a, const expansion& b, expansion& sum)
{
   /* slow but obvious way of doing it
   add(a, b[0], sum);
   for(unsigned int i=1; i<b.size(); ++i)
      add(sum, b[i], sum); // aliasing sum is safe
   */
   // Shewchuk's fast-expansion-sum
   if(a.empty()){
      sum=b;
      return;
   }else if(b.empty()){
      sum=a;
      return;
   }
   expansion merge(a.size()+b.size(), 0);
   unsigned int i=0, j=0, k=0;
   for(;;){
      if(std::fabs(a[i])<std::fabs(b[j])){
         merge[k++]=a[i++];
         if(i==a.size()){
            while(j<b.size()) merge[k++]=b[j++];
            break;
         }
      }else{
         merge[k++]=b[j++];
         if(j==b.size()){
            while(i<a.size()) merge[k++]=a[i++];
            break;
         }
      }
   }
   sum.reserve(merge.size());
   sum.resize(0);
   double q, r;
   fast_two_sum(merge[1], merge[0], q, r);
   if(r) sum.push_back(r);
   for(i=2; i<merge.size(); ++i){
      two_sum(q, merge[i], q, r);
      if(r) sum.push_back(r);
   }
   if(q) sum.push_back(q);
}
Пример #5
0
//==============================================================================
void
multiply(const expansion& a, double b, expansion& product)
{
   // basic idea:
   // multiply each entry in a by b (producing two new entries), then
   // two_sum them in such a way to guarantee increasing/non-overlapping output
   product.resize(2*a.size());
   if(a.empty()) return;
   two_product(a[0], b, product[1], product[0]); // finalize product[0]
   double x, y, z;
   for(unsigned int i=1; i<a.size(); ++i){
      two_product(a[i], b, x, y);
      // finalize product[2*i-1]
      two_sum(product[2*i-1], y, z, product[2*i-1]);
      // finalize product[2*i], could be fast_two_sum instead
      fast_two_sum(x, z, product[2*i+1], product[2*i]);
   }
   // multiplication is a prime candidate for producing spurious zeros, so
   // remove them by default
   remove_zeros(product);
}
Пример #6
0
//==============================================================================
void
add(double a, double b, expansion& sum)
{
   sum.resize(2);
   two_sum(a, b, sum[1], sum[0]);
}
Пример #7
0
 vector<vector<int> > threeSum(vector<int> &num) {
     vector<vector<int> > ret;
     int nzbegin = -1;    // non-zero element index beginning
     
     if (num.size() < 3) {  // error checking
         return ret;
     }
     
     // sort the array 
     sort(num.begin(), num.end());
     
     // do some preprocessing work
     if (num[0] < 0) {
         for (int i = 1; i < num.size(); i++) {
             if (num[i] >= 0) {
                 nzbegin = i;
                 break;
             }
         }
         if (nzbegin == -1 || num[num.size() - 1] == 0) {
             return ret;
         }
     } else if (num[0] == 0 && num[1] == 0 && num[2] == 0) {
         return {{0, 0, 0}};
     } else {
         return ret;
     }
     
     // one negative number, two positive numbers
     int last_match = numeric_limits<int>::max();     // sentinel value
     if (nzbegin < num.size() - 1) {
         for (int i = 0; i < nzbegin; i++) {
             if (num[i] == last_match) {
                 continue;
             }
             
             int match = -num[i];
             two_sum(ret, num, num[i], match, nzbegin, num.size(), true);
             last_match = num[i];
         }
     }
     // two negative numbers, one positive number
     last_match = numeric_limits<int>::max();
     if (nzbegin > 1) {
         for (int i = nzbegin; i < num.size(); i++) {
             if (num[i] == last_match) {
                 continue;
             }
             
             int match = -num[i];
             two_sum(ret, num, num[i], match, 0, nzbegin, false);
             last_match = num[i];
         }
     }
     // three zero but has negative numbers
     if (nzbegin < num.size() - 2 && num[nzbegin + 2] == 0) {
         ret.push_back({0, 0, 0});
     }
     
     return ret;
 }
Пример #8
0
    void test_csrmv()
    {
        clsparseStatus status;
        cl_int cl_status;

        clsparseEnableExtendedPrecision(CLSE::control, extended_precision);

        if (typeid(T) == typeid(cl_float) )
        {
            status = clsparseScsrmv(&gAlpha, &CSRE::csrSMatrix, &gX,
                                    &gBeta, &gY, CLSE::control);

            ASSERT_EQ(clsparseSuccess, status);

            float* vals = (float*)&CSRE::ublasSCsr.value_data()[0];
            int* rows = &CSRE::ublasSCsr.index1_data()[0];
            int* cols = &CSRE::ublasSCsr.index2_data()[0];
            for (int row = 0; row < CSRE::n_rows; row++)
            {
                // Summation done at a higher precision to decrease
                // summation errors from rounding.
                hY[row] *= hBeta;
                int row_end = rows[row+1];
                double temp_sum;
                temp_sum = hY[row];
                for (int i = rows[row]; i < rows[row+1]; i++)
                {
                    // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]];
                    temp_sum += hAlpha * vals[i] * hX[cols[i]];
                }
                hY[row] = temp_sum;
            }

            T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values,
                                                       CL_TRUE, CL_MAP_READ,
                                                       0, gY.num_values * sizeof(T),
                                                       0, nullptr, nullptr, &cl_status);
            ASSERT_EQ(CL_SUCCESS, cl_status);

            uint64_t max_ulps = 0;
            uint64_t min_ulps = UINT64_MAX;
            uint64_t total_ulps = 0;
            for (int i = 0; i < hY.size(); i++)
            {
                long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]);
                intDiff = llabs(intDiff);
                total_ulps += intDiff;
                if (max_ulps < intDiff)
                    max_ulps = intDiff;
                if (min_ulps > intDiff)
                    min_ulps = intDiff;
                // Debug printouts.
                //std::cout << "Row " << i << " Float Ulps: " << intDiff << std::endl;
                //std::cout.precision(9);
                //std::cout << "\tFloat hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint32_t *)&hY[i] << "), " << std::dec;
                //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint32_t *)&host_result[i] << ")" << std::dec << std::endl;
            }
#ifndef NDEBUG
            if (extended_precision)
            {
                std::cout << "Float Min ulps: " << min_ulps << std::endl;
                std::cout << "Float Max ulps: " << max_ulps << std::endl;
                std::cout << "Float Total ulps: " << total_ulps << std::endl;
                std::cout << "Float Average ulps: " << (double)total_ulps/(double)hY.size() <<  " (Size: " << hY.size() << ")" << std::endl;
            }
#endif

            for (int i = 0; i < hY.size(); i++)
            {
                double compare_val = 0.;
                if (extended_precision)
                {
                    // The limit here is somewhat weak because some GPUs don't
                    // support correctly rounded denorms in SPFP mode.
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*1e-3);
                }
                else
                {
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*0.1);
                }
                if (compare_val < 10*FLT_EPSILON)
                    compare_val = 10*FLT_EPSILON;
                ASSERT_NEAR(hY[i], host_result[i], compare_val);
            }

            cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values,
                                                  host_result, 0, nullptr, nullptr);
            ASSERT_EQ(CL_SUCCESS, cl_status);
        }

        if (typeid(T) == typeid(cl_double) )
        {
            status = clsparseDcsrmv(&gAlpha, &CSRE::csrDMatrix, &gX,
                                    &gBeta, &gY, CLSE::control);

            ASSERT_EQ(clsparseSuccess, status);

            double* vals = (double*)&CSRE::ublasDCsr.value_data()[0];
            int* rows = &CSRE::ublasDCsr.index1_data()[0];
            int* cols = &CSRE::ublasDCsr.index2_data()[0];
            for (int row = 0; row < CSRE::n_rows; row++)
            {
                // Summation done using a compensated summation to decrease
                // summation errors from rounding. This allows us to get
                // smaller errors without requiring quad precision support.
                // This method is like performing summation at quad precision and
                // casting down to double in the end.
                hY[row] *= hBeta;
                int row_end = rows[row+1];
                double temp_sum;
                temp_sum = hY[row];
                T sumk_err = 0.;
                for (int i = rows[row]; i < rows[row+1]; i++)
                {
                    // Perform: hY[row] += hAlpha * vals[i] * hX[cols[i]];
                    temp_sum = two_sum(temp_sum, hAlpha*vals[i]*hX[cols[i]], &sumk_err);
                }
                hY[row] = temp_sum + sumk_err;
            }

            T* host_result = (T*) ::clEnqueueMapBuffer(CLSE::queue, gY.values,
                                                       CL_TRUE, CL_MAP_READ,
                                                       0, gY.num_values * sizeof(T),
                                                       0, nullptr, nullptr, &cl_status);
            ASSERT_EQ(CL_SUCCESS, cl_status);

            uint64_t max_ulps = 0;
            uint64_t min_ulps = ULLONG_MAX;
            uint64_t total_ulps = 0;
            for (int i = 0; i < hY.size(); i++)
            {
                long long int intDiff = (long long int)boost::math::float_distance(hY[i], host_result[i]);
                intDiff = llabs(intDiff);
                total_ulps += intDiff;
                if (max_ulps < intDiff)
                    max_ulps = intDiff;
                if (min_ulps > intDiff)
                    min_ulps = intDiff;
                // Debug printouts.
                //std::cout << "Row " << i << " Double Ulps: " << intDiff << std::endl;
                //std::cout.precision(17);
                //std::cout << "\tDouble hY[" << i << "] = " << std::scientific << hY[i] << " (0x" << std::hex << *(uint64_t *)&hY[i] << "), " << std::dec;
                //std::cout << "host_result[" << i << "] = " << std::scientific << host_result[i] << " (0x" << std::hex << *(uint64_t *)&host_result[i] << ")" << std::dec << std::endl;
            }
            if (extended_precision)
            {
#ifndef NDEBUG
                std::cout << "Double Min ulps: " << min_ulps << std::endl;
                std::cout << "Double Max ulps: " << max_ulps << std::endl;
                std::cout << "Double Total ulps: " << total_ulps << std::endl;
                std::cout << "Double Average ulps: " << (double)total_ulps/(double)hY.size() <<  " (Size: " << hY.size() << ")" << std::endl;
#endif

                for (int i = 0; i < hY.size(); i++)
                {
                    double compare_val = fabs(hY[i]*1e-14);
                    if (compare_val < 10*DBL_EPSILON)
                        compare_val = 10*DBL_EPSILON;
                    ASSERT_NEAR(hY[i], host_result[i], compare_val);
                }
            }
            else
            {
                for (int i = 0; i < hY.size(); i++)
                {
                    double compare_val = 0.;
                    if (boost::math::isnormal(hY[i]))
                        compare_val = fabs(hY[i]*0.1);
                    if (compare_val < 10*DBL_EPSILON)
                        compare_val = 10*DBL_EPSILON;
                    ASSERT_NEAR(hY[i], host_result[i], compare_val);
                }
            }

            cl_status = ::clEnqueueUnmapMemObject(CLSE::queue, gY.values,
                                                  host_result, 0, nullptr, nullptr);
            ASSERT_EQ(CL_SUCCESS, cl_status);
        }
        // Reset output buffer for next test.
        ::clReleaseMemObject(gY.values);
        clsparseInitVector(&gY);
        gY.values = clCreateBuffer(CLSE::context,
                CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                hY.size() * sizeof(T), hY.data().begin(),
                &cl_status);
        gY.num_values = hY.size();
        ASSERT_EQ(CL_SUCCESS, cl_status);
    }