Пример #1
0
void testPow(int N, int rep) {
    cout << "================================ " << endl;
    vector<T>   data1(N*4);
    vector<T>   data2(N*4);

    for (int i = 0; i < 4*N; ++i) {
        data1[i] = 1 + (rand() % 10000) / 10.0;
        data2[i] = 1 + (rand() % 10000) / 10.0;
    }

    WallClockTimer timer;
    T sum = 0;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += pow(data1[i],   data2[i]); 
            sum += pow(data1[i+1], data2[i+1]); 
            sum += pow(data1[i+2], data2[i+2]); 
            sum += pow(data1[i+3], data2[i+3]); 
        }
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = rep * N * 4;
    cout << "Ignore: " << sum << endl;
    cout << "Pows computed: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of Pows per sec: " << (float(TotalQty) / t) << endl;
    
}
Пример #2
0
void testAtan(int N, int rep) {
    vector<T>   data(N*4);

    for (int i = 0; i < 4*N; ++i) {
        data[i] = 1 + (rand() % 10000) / 1000.0;
    }

    WallClockTimer timer;
    T sum = 0;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += atan(data[i]);
            sum += atan(data[i+1]);
            sum += atan(data[i+2]);
            sum += atan(data[i+3]);
        }
        sum /= N*4;
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = rep * N * 4;
    cout << "Ignore: " << sum << endl;
    cout << "Atans computed: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of Atans per sec: " << (float(TotalQty) / t) << endl;

}
Пример #3
0
void test3(int N, int rep) {
  WallClockTimer timer;

  uint64_t total = 0;

  uint64_t sum = 0;

  string       emptyStr;
  stringstream str;

  for (int j = 0; j < rep; ++j) {

    timer.reset();

    for (int i = 0; i < N; i++) {
      str.str(emptyStr);
      str << i << " " << j; 
      sum += reinterpret_cast<size_t>(str.str().c_str());
    }

    total += timer.split();
  }

  cout << "Ignore: " << sum << endl;
  cout << " total # of proc without construct/deconstruct: " << rep * N << ", time " <<  total / 1e3 << " ms" << " proc per sec: " << (rep * N * 1e6 / total ) << endl;
}
Пример #4
0
void testRoot(int N, size_t MaxRoot, int rep) {
    vector<T>   data(N*4);

    for (int i = 0; i < 4*N; ++i) {
        data[i] = 1 + (rand() % (10 * MaxRoot)) / 10.0;
    }

    WallClockTimer timer;
    T sum = 0;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += sqrt(data[i]); 
            sum += sqrt(data[i+1]); 
            sum += sqrt(data[i+2]); 
            sum += sqrt(data[i+3]); 
        }
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = uint64_t(rep) * N * 4LL;
    cout << "Ignore: " << sum << endl;
    cout << "max root val.: " << MaxRoot << " Roots computed: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of Roots per sec: " << (float(TotalQty) / t) << endl;
    
}
Пример #5
0
void testIntPowExplicitTemplate(int IntExp, int N, int rep) {
    cout << "================================ " << endl;
    vector<T>   data(N*4);

    for (int i = 0; i < 4*N; ++i) {
        data[i] = 1 + (rand() % 10000) / 1000.0;
    }

    WallClockTimer timer;
    T sum = 0;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += pow(data[i],   (unsigned)IntExp); 
            sum += pow(data[i+1], (unsigned)IntExp); 
            sum += pow(data[i+2], (unsigned)IntExp); 
            sum += pow(data[i+3], (unsigned)IntExp); 
        }
        sum /= N*4;
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = rep * N * 4;
    cout << "Ignore: " << sum << endl;
    cout << "Pows (expl arguments) computed, degree: " << IntExp << " TotalQty: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of integer Pows (expl arguments) per sec: " << (float(TotalQty) / t) << endl;
    
}
Пример #6
0
void testEfficientFractPow(int N, int rep, 
                           unsigned FuncNumDig, unsigned DataNumDig, 
                           bool bRootOnly) {
    cout << "================================ " << endl;
    vector<T>   data1(N*4);
    vector<T>   data2(N*4);

    uint64_t MaxK = uint64_t(1)<<FuncNumDig;
    uint64_t DataMaxK = uint64_t(1)<<DataNumDig;

    for (int i = 0; i < 4*N; ++i) {
        data1[i] = 1 + (rand() % 10000) / 10.0;
        data2[i] = bRootOnly ? T(1) / T(DataMaxK):(rand() % MaxK) / T(DataMaxK);
    }

    WallClockTimer timer;
    T sum = 0;
    T fract = T(1)/N;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += 0.01 * EfficientFractPow(data1[i],   data2[i], FuncNumDig); 
            sum += 0.01 * EfficientFractPow(data1[i+1], data2[i+1], FuncNumDig); 
            sum += 0.01 * EfficientFractPow(data1[i+2], data2[i+2], FuncNumDig); 
            sum += 0.01 * EfficientFractPow(data1[i+3], data2[i+3], FuncNumDig); 
        }
        sum *= fract;
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = rep * N * 4;
    cout << "Ignore: " << sum << endl;
    cout << "Pows computed: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of efficient fract Pows (bRootOnly = "  << bRootOnly << " per sec: " << (float(TotalQty) / t) << " FuncNumDig = " << FuncNumDig << " DataNumDig = " << DataNumDig << endl;
    
}
Пример #7
0
void testIntPowOptim2(int IntExp, int N, int rep) {
    cout << "================================ " << endl;
    vector<T>   data(N*4);

    for (int i = 0; i < 4*N; ++i) {
        data[i] = 1 + (rand() % 10000) / 1000.0;
    }

    WallClockTimer timer;
    T sum = 0;
    for (int j = 0; j < rep; ++j) {
        for (int i = 0; i < N*4; i+=4) {
            sum += PowOptimPosExp2(data[i],   IntExp); 
            sum += PowOptimPosExp2(data[i+1], IntExp); 
            sum += PowOptimPosExp2(data[i+2], IntExp); 
            sum += PowOptimPosExp2(data[i+3], IntExp); 
        }
    }
    timer.split();
    uint64_t t = timer.elapsed();
    uint64_t TotalQty = rep * N * 4;
    cout << "Ignore: " << sum << endl;
    cout << "Pows (optimized2) computed, degree: " << IntExp << " TotalQty: " << TotalQty << ", time " <<  t / 1e3 << " ms, type: " << typeid(T).name() << endl;
    cout << "Milllions of integer (optimized2) Pows per sec: " << (float(TotalQty) / t) << endl;
    
}
void test(size_t N ) {
    WallClockTimer time;
    for(int t = 0; t<2;++t) {
      cout <<" test # "<< t<<endl;
      vector<short> data = givemeanarray(N) ;
      vector<short> copydata(data);
      
      time.reset();
      straightsum(&data[0],N);
      cout<<"straight sum (C-like) "<<N/(1000.0*time.split())<<endl;   
      
 
      time.reset();
      slowishSum(data);
      cout<<"basic sum (C++-like) "<<N/(1000.0*time.split())<<endl;   
      
      data = copydata;

      time.reset();
      sum(data);
      cout<<"smarter sum "<<N/(1000.0*time.split())<<endl;   
 
      data = copydata;

      time.reset();
      fastSum(data);
      cout<<"fast sum "<<N/(1000.0*time.split())<<endl;   
 
      cout<<endl<<endl<<endl;

    }

}
int overall(size_t N) {
	int bogus = 0;
	WallClockTimer t;
	t.reset();
    bogus += testSTL(N);
    int delay = t.split();
    cout << "STL vector " << N /(delay * 1000.0) << endl;
    vector<double> idelays;
    for(size_t T = 0 ; T < 20 ; ++T ) {
          t.reset();
    	  bogus += straight(N);
    	  int tdelay = t.split();
    	  idelays.push_back(tdelay);
    }
    cout << "static array : " << N /(median(idelays) * 1000.0) << endl;
    for(size_t factor = 1; factor <= 6; ++ factor) {
        vector<double> delays;
        for(size_t T = 0 ; T < 20 ; ++T ) {
          t.reset();
    	  bogus += testManual(N,2+factor,2);
    	  int tdelay = t.split();
    	  delays.push_back(tdelay);
        }
    	cout << "pointer-based "<< (factor +2)/2.0<< " : " << N /(median(delays) * 1000.0) << endl;
    }
    return bogus;
}
int test(const size_t N) {
	int *  a = new int[N];
	for(size_t k = 0; k< N; ++k)
	  a[k] = k - 2 + k * k;
	int fakecounter = 0;
	cout<<" Buffer size = "<< N*sizeof(int) /(1024.0*1024.0)<<" MB "<<endl;

	WallClockTimer t;
	double besttime1 = numeric_limits<double>::max();
	double besttime2 = numeric_limits<double>::max();
	double besttime3 = numeric_limits<double>::max();
	for(int k = 0; k<20;++k) {
		t.reset();
		fakecounter += totalsum(a,N);
		double thistime1 = t.split();
		if(thistime1 < besttime1) besttime1 = thistime1;
		t.reset();
		fakecounter += sum<2>(a,N);
		double thistime2 = t.split();
		if(thistime2 < besttime2) besttime2 = thistime2;
		t.reset();
		fakecounter += sum<16>(a,N);
		double thistime3 = t.split();
		if(thistime3 < besttime3) besttime3 = thistime3;
	}
    cout<<" total sum speed = "<<N/(1000*1000*besttime1) <<" mis or "<< N*sizeof(int)/(1024.0*1024.0*besttime1)<<" MB/s"<<endl;
    cout<<" partial sum speed = "<<N/(1000*1000*besttime2) <<" mis or "<< N*sizeof(int)/(1024.0*1024.0*besttime2)<<" MB/s"<<endl;
    cout<<" speed ratio = "<< besttime1 /besttime2<<endl;
    cout<<" partial sum speed = "<<N/(1000*1000*besttime3) <<" mis or "<< N*sizeof(int)/(1024.0*1024.0*besttime3)<<" MB/s"<<endl;
    cout<<" speed ratio = "<< besttime1 /besttime3<<endl;
    return fakecounter;
}
void testPackUnpackC(size_t N =  2048 * 32 * 2048) {
	WallClockTimer timer;
	bool* data = new bool[N];
	for(size_t i = 0; i<N; ++i)
	  data[i] = static_cast<bool>(i & 1);
	vector<char> comp(N/8);
	for(size_t t = 0; t< 3; ++t) {
		timer.reset();
		pack(data, &comp[0], N);
		cout<<" pack time = "<<timer.split()<<endl;
		timer.reset();
		unpack(&comp[0], data, N);
		cout<<" unpack time = "<<timer.split()<<endl;
		for(size_t i = 0; i<N; ++i) 
			assert(data[i] == static_cast<bool>(i & 1));	  
	}
	delete[] data;
}
int testStoreLoadC(size_t M =  2048 * 4, size_t N = 2048 * 8, size_t repeat = 1) {
	WallClockTimer timer;
	vector<int> data;
	int bogus;
	for(size_t i = 0; i<M; ++i)
	  data.push_back(i);
	vector<int> bigdata;
	bigdata.resize(M * N);
	for(size_t t = 0; t< 3; ++t) {
		timer.reset();
		for (size_t r = 0; r < repeat; ++r)
			bogus += storeTestC(&data[0],&bigdata[0],N,M);
		if(t>0) cout<<" store time = "<<timer.split()<<endl;
		timer.reset();
		for (size_t r = 0; r < repeat; ++r)
	    	bogus += loadTestC(&data[0],&bigdata[0],N,M);
		if(t>0) cout<<" load time = "<<timer.split()<<endl;
		for(int i = 0; i<M; ++i)
	  		assert(data[i] == i);
	}
    return bogus;
}
void test(size_t N ) {
    cout << "min distance between ints is "<<mindist<<endl;
    WallClockTimer time;
    for(int t = 0; t<2;++t) {
      cout <<" test # "<< t<<endl;
      vector<int> data = givemeanarray(N) ;
      vector<int> copydata(data);
      
      time.reset();
      cdelta<mindist>(&data[0],data.size());
      cout<<"c delta speed "<<N/(1000.0*time.split())<<endl;   
      time.reset();
      cinverseDelta<mindist>(&data[0],data.size());
      cout<<"c inverse delta speed "<<N/(1000.0*time.split())<<endl;   
      if(data != copydata) throw runtime_error("bug!");
      cout<<endl;
 
      time.reset();
      delta<mindist>(data);
      cout<<"delta speed "<<N/(1000.0*time.split())<<endl;   
      time.reset();
      inverseDelta<mindist>(data);
      cout<<"inverse delta speed "<<N/(1000.0*time.split())<<endl;   
      if(data != copydata) throw runtime_error("bug!");
      cout<<endl;


      delta<mindist>(data);
      time.reset();
      slowishinverseDelta<mindist>(data);
      cout<<"slowish inverse delta speed "<<N/(1000.0*time.split())<<endl;   
      if(data != copydata) throw runtime_error("bug!");
      cout<<endl;

      delta<mindist>(data);
      time.reset();
      bufferedinverseDelta<mindist>(data);
      cout<<"buffered inverse delta speed "<<N/(1000.0*time.split())<<endl;   
      if(data != copydata) throw runtime_error("bug!");
      cout<<endl;


      delta<mindist>(data);
      time.reset();
      inverseDeltaVolkov<mindist>(data);
      cout<<"inverse delta speed (volkov-lemire) "<<N/(1000.0*time.split())<<endl;   
      if(data != copydata) throw runtime_error("bug!");
      cout<<endl;


      cout<<endl<<endl<<endl;
    }

}
Пример #14
0
int main(int argc, char * argv[]) {
  std::string usage = EXECUTABLE " in LTP " LTP_VERSION " - " LTP_COPYRIGHT "\n";
  usage += DESCRIPTION "\n\n";
  usage += "usage: ./" EXECUTABLE " <options>\n\n";
  usage += "options";

  options_description optparser = options_description(usage);
  optparser.add_options()
    ("threads", value<int>(), "The number of threads [default=1].")
    ("input", value<std::string>(), "The path to the input file. "
     "Input data should contain one sentence each line. "
     "Words should be separated by space with POS tag appended by "
     "'_' (e.g. \"w1_p1 w2_p2 w3_p3 w4_p4\").")
    ("ner-model", value<std::string>(),
     "The path to the postag model [default=ltp_data/ner.model].")
    ("help,h", "Show help information");

  if (argc == 1) {
    std::cerr << optparser << std::endl;
    return 1;
  }

  variables_map vm;
  store(parse_command_line(argc, argv, optparser), vm);

  if (vm.count("help")) {
    std::cerr << optparser << std::endl;
    return 0;
  }

  int threads = 1;
  if (vm.count("threads")) {
    threads = vm["threads"].as<int>();
    if (threads < 0) {
      std::cerr << "number of threads should not less than 0, reset to 1." << std::endl;
      threads = 1;
    }
  }

  std::string input = "";
  if (vm.count("input")) { input = vm["input"].as<std::string>(); }

  std::string ner_model = "ltp_data/ner.model";
  if (vm.count("ner-model")) {
    ner_model = vm["ner-model"].as<std::string>();
  }

  void *engine = ner_create_recognizer(ner_model.c_str());
  if (!engine) {
    return 1;
  }

  std::cerr << "TRACE: Model is loaded" << std::endl;
  std::cerr << "TRACE: Running " << threads << " thread(s)" << std::endl;

  std::ifstream ifs(input.c_str());
  std::istream* is = NULL;

  if (!ifs.good()) {
    std::cerr << "WARN: Cann't open file! use stdin instead." << std::endl;
    is = (&std::cin);
  } else {
    is = (&ifs);
  }

  Dispatcher * dispatcher = new Dispatcher( engine, (*is), std::cout );
  WallClockTimer t;
  std::list<tthread::thread *> thread_list;
  for (int i = 0; i < threads; ++ i) {
    tthread::thread * t = new tthread::thread( multithreaded_recognize, (void *)dispatcher );
    thread_list.push_back( t );
  }

  for (std::list<tthread::thread *>::iterator i = thread_list.begin();
      i != thread_list.end(); ++ i) {
    tthread::thread * t = *i;
    t->join();
    delete t;
  }

  std::cerr << "TRACE: consume " << t.elapsed() << " seconds." << std::endl;
  delete dispatcher;
  ner_release_recognizer(engine);
  return 0;
}
int main() {
    assert(sizeof(long)==8);
    assert(sizeof(int)==4);
    WallClockTimer timer;
    int repeat = 100;
    int N = 10000;
    cout<<"# We report bits-per-integer speed-of-naive speed-of-popcnt1 speed-of-popcnt2 speed-of-table speed-of-tzcnt1 speed-of-tzcnt2 where speeds are in millions of integers per second "<<endl;
    for(int sb = 1; sb<=64; sb*=2) {
        int setbitsmax = sb*N;
        vector<long> bitmap(N);
        for (int k = 0; k < setbitsmax; ++k) {
            int bit = rand() % (N*64);
            bitmap[bit/64] |= (1L<<(bit%64));
        }
        int bitcount = 0;
        for(int k = 0; k <N; ++k) {
            bitcount += __builtin_popcountl(bitmap[k]);
        }
        double bitsperinteger = N*sizeof(long)*8.0/bitcount;
        vector<int> outputnaive(bitcount);
        vector<int> outputpopcnt1(bitcount);
        vector<int> outputpopcnt2(bitcount);
        vector<int> outputtable(bitcount);
        vector<int> outputctz1(bitcount);
        vector<int> outputctz2(bitcount);
        cout<<"# Stored "<<bitcount<<" unary numbers in  ";
        cout<< N*sizeof(long)<<" bytes " ;
        cout<<" ("<<bitsperinteger<<" bits per number)"<<endl;
        timer.reset();
        int c0 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c0 = bitscanunary_naive(bitmap.data(),N,outputnaive.data());
        int tinaive = timer.split();
        timer.reset();
        int c1 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c1 = bitscanunary_popcnt1(bitmap.data(),N,outputpopcnt1.data());
        assert(c1 == c0);
        int tipopcnt1 = timer.split();
        timer.reset();
        int c12 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c12 = bitscanunary_popcnt2(bitmap.data(),N,outputpopcnt2.data());
        assert(c12 == c0);
        int tipopcnt2 = timer.split();
        timer.reset();
        int c2 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c2 = bitscanunary_table(bitmap.data(),N,outputtable.data());
        assert(c2 == c0);
        int titable = timer.split();
        timer.reset();
        int c3 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c3 = bitscanunary_ctzl1(bitmap.data(),N,outputctz1.data());
        assert(c3 == c0);
        int tictz1 = timer.split();
        timer.reset();
        int c32 = 0;
        for(int t1=0; t1<repeat; ++t1)
            c32 = bitscanunary_ctzl2(bitmap.data(),N,outputctz2.data());
        assert(c32 == c0);
        int tictz2 = timer.split();

        assert (outputnaive == outputpopcnt1);
        assert (outputnaive == outputpopcnt2);
        assert (outputnaive == outputtable);
        assert (outputnaive == outputctz1);
        assert (outputnaive == outputctz2);        
        cout << bitsperinteger<<" " ;
        cout << bitcount * repeat * 0.001 /tinaive <<" ";
        cout << bitcount * repeat * 0.001 /tipopcnt1 <<" ";
        cout << bitcount * repeat * 0.001 /tipopcnt2 <<" ";
        cout << bitcount * repeat * 0.001 /titable <<" ";
        cout << bitcount * repeat * 0.001 /tictz1 <<" ";
        cout << bitcount * repeat * 0.001 /tictz2 <<" ";
        cout << endl ;
    }

    return 0;
}
Пример #16
0
void CurlStreamFile::fillCache(std::streampos size){
#if 1
	assert(size >= 0);
	if(! _running || _cached >=size){
		return ;
	}
	fd_set readfd, writefd, exceptfd;
	int maxfd;
	CURLMcode mcode;
	timeval tv;
	//hard-coded slect timeout
	//this number is kept low to give more thread switch
	//opportunities while waitting for a load
	const long maxSleepUsec = 10000; //1/100 of a second

	const unsigned int userTimeout = 60000;
	WallClockTimer lastProgress;
	while(_running){
		fillCacheNonBlocking();
		if(_cached>=size || !_running) break;

		FD_ZERO(&readfd);
		FD_ZERO(&writefd);
		FD_ZERO(&exceptfd);
		mcode = curl_multi_fdset(_mCurlHandle, &readfd, &writefd,
				&exceptfd, &maxfd);
		if(mcode != CURLM_OK){
			throw SnailException(curl_multi_strerror(mcode));
		}
		if(maxfd<0){
			//as of libcurl 7.21.x, the DNS resolving appears to be
			//going on in the background, so curl_multi_fdset fails to
			//return anything useful, So we use the user timeout value
			//to give DNS enough time to resolve the lookup
			if(userTimeout && lastProgress.elapsed()>userTimeout){
				return ;
			}else{
				continue;
			}
		}//if(maxfd<0)
		tv.tv_sec = 0;
		tv.tv_usec = maxSleepUsec;
		//wait for data on the filedescriptors until a timeout set in rc file
		int ret = select(maxfd+1, &readfd, &writefd, &exceptfd, &tv);
#if !defined(WIN32)
		if(ret == -1){
			if(errno == EINTR){
				cout<<"select() was interrupted by a singal"<<endl;
				ret = 0;
			}else{
				std::ostringstream os;
				os<<"error polling data from connection to"<<_url<<":"<<strerror(errno);
				throw SnailException(os.str());
			}
		}
#endif
		if(!ret){
			//timeout check the clock to see
			//if we expired the user timeout
			if(userTimeout && lastProgress.elapsed() > userTimeout){
				cout<<"timeout ("<<userTimeout<<") while loading from URL"<<_url<<endl;
				return ;
			}
		}else{
			lastProgress.restart();
		}
	}//while(....
	processMessages();
#endif
}
int main(int argc, char **argv) {
  size_t howmany = 0;
  size_t loop = 3;
  bool uniform = false;
  uint32_t Big = 22;
  float intersectionratio = 0.3f;
  uint32_t MaxBit = 26;
  int c;
  while ((c = getopt(argc, argv, "uns:m:R:M:S:l:h")) != -1)
    switch (c) {
    case 'h':
      printusage();
      return 0;
    case 'S':
      Big = atoi(optarg);
      break;
    case 'R':
      intersectionratio = atof(optarg);
      break;
    case 'M':
      MaxBit = atoi(optarg);
      if (MaxBit < 1) {
        printusage();
        return -1;
      }
      break;
    case 'm':
      howmany = atoi(optarg);
      if (howmany < 1) {
        printusage();
        return -1;
      }
      break;
    case 'l':
      loop = atoi(optarg);
      if (loop < 1) {
        printusage();
        return -1;
      }
      break;
    case 'u':
      uniform = true;
      break;
    default:
      abort();
    }
  if (howmany == 0) {
    howmany = 5;
  }
  cout << "# howmany : " << howmany << endl;
  cout << "# loop : " << loop << endl;
  cout << "# distribution : " << (uniform ? "uniform" : "clustered") << endl;
  cout << "# Big : " << Big << endl;
  cout << "# intersectionratio : " << intersectionratio << endl;
  cout << "# MaxBit : " << MaxBit << endl;
  UniformDataGenerator udg;
  ClusteredDataGenerator cdg;
  WallClockTimer z;
  size_t bogus = 0;
  vector<uint32_t> buffer(2 * (1U << Big));
#ifdef LIKWID_MARKERS
  char currentMarker[64];
  likwid_markerInit();
#endif

  cout << "# size-ratio\t";
  for (string intername : IntersectionFactory::allNames()) {
    cout << intername << "\t";
  }
  cout << " partioned (Schlegel et al.: improved, original) 16-bitV1 "
          "16-bitscalar ";
  cout << "relative-intersection-size " << endl;

  for (float ir = 1.001; ir <= 10000; ir = ir * sqrt(1.9)) {
    vector<pair<vector<uint32_t>, vector<uint32_t>>> data(howmany);
    uint32_t smallsize =
        static_cast<uint32_t>(round(static_cast<float>(1 << Big) / ir));
    cout << "#generating data...";
    cout.flush();
    for (size_t k = 0; k < howmany; ++k) {
      data[k] = uniform ? getNaivePair(udg, smallsize, 1U << MaxBit, ir,
                                       intersectionratio)
                        : getNaivePair(cdg, smallsize, 1U << MaxBit, ir,
                                       intersectionratio);
    }
    cout << "ok." << endl;
    cout << "#partitions...";
    vector<pair<vector<uint16_t>, vector<uint16_t>>> datapart(howmany);
    for (size_t k = 0; k < howmany; ++k) {
      vector<uint16_t> part1(data[k].first.size() * 4);
      size_t p1length = partition(data[k].first.data(), data[k].first.size(),
                                  part1.data(), part1.size());
      part1.resize(p1length);
      part1.shrink_to_fit();
      vector<uint16_t> part2(data[k].second.size() * 4);
      size_t p2length = partition(data[k].second.data(), data[k].second.size(),
                                  part2.data(), part2.size());
      part2.resize(p2length);
      part2.shrink_to_fit();
      datapart[k] = make_pair(part1, part2);
    }
    cout << "ok." << endl;

    cout << ir << "\t";
    float aratio = 0.0f;
    for (string intername : IntersectionFactory::allNames()) {
      intersectionfunction interfnc =
          IntersectionFactory::getFromName(intername);
      size_t volume = 0;
#ifdef LIKWID_MARKERS
      snprintf(currentMarker, sizeof(currentMarker), "%s %.2f",
               intername.c_str(), ir);
      likwid_markerStartRegion(currentMarker);
#endif
      z.reset();
      for (size_t k = 0; k < data.size(); ++k) {
        volume += (data[k].first.size() + data[k].second.size()) * loop;
        for (size_t L = 0; L < loop; ++L) {
          aratio = interfnc(data[k].first.data(), (data[k].first).size(),
                            data[k].second.data(), (data[k].second).size(),
                            buffer.data());
          bogus += aratio;
        }
      }
      cout << setw(10) << setprecision(5)
           << (volume / (static_cast<double>(z.split()))) << "\t";
#ifdef LIKWID_MARKERS
      likwid_markerStopRegion(currentMarker);
#endif
    }
    z.reset();
    size_t volume = 0;
    for (size_t k = 0; k < data.size(); ++k) {
      volume += (data[k].first.size() + data[k].second.size()) * loop;
      for (size_t L = 0; L < loop; ++L) {
        aratio = intersect_partitioned(
            datapart[k].first.data(), (datapart[k].first).size(),
            datapart[k].second.data(), (datapart[k].second).size(),
            (uint16_t *)buffer.data());
        bogus += aratio;
      }
    }
    cout << setw(10) << setprecision(5)
         << (volume / (static_cast<double>(z.split()))) << "\t";
    z.reset();
    volume = 0;
    for (size_t k = 0; k < data.size(); ++k) {
      volume += (data[k].first.size() + data[k].second.size()) * loop;
      for (size_t L = 0; L < loop; ++L) {
        aratio = original_intersect_partitioned(
            datapart[k].first.data(), (datapart[k].first).size(),
            datapart[k].second.data(), (datapart[k].second).size(),
            (uint16_t *)buffer.data());
        bogus += aratio;
      }
    }
    cout << setw(10) << setprecision(5)
         << (volume / (static_cast<double>(z.split()))) << "\t";
    z.reset();
    volume = 0;
    for (size_t k = 0; k < data.size(); ++k) {
      volume += (data[k].first.size() + data[k].second.size()) * loop;
      for (size_t L = 0; L < loop; ++L) {
        aratio = intersect_partitionedV1(
            datapart[k].first.data(), (datapart[k].first).size(),
            datapart[k].second.data(), (datapart[k].second).size(),
            (uint16_t *)buffer.data());
        bogus += aratio;
      }
    }
    cout << setw(10) << setprecision(5)
         << (volume / (static_cast<double>(z.split()))) << "\t";
    z.reset();
    volume = 0;
    for (size_t k = 0; k < data.size(); ++k) {
      volume += (data[k].first.size() + data[k].second.size()) * loop;
      for (size_t L = 0; L < loop; ++L) {
        aratio = intersect_partitionedscalar(
            datapart[k].first.data(), (datapart[k].first).size(),
            datapart[k].second.data(), (datapart[k].second).size(),
            (uint16_t *)buffer.data());
        bogus += aratio;
      }
    }
    cout << setw(10) << setprecision(5)
         << (volume / (static_cast<double>(z.split()))) << "\t";
    cout << "\t\t" << aratio / smallsize;
    cout << endl;
  }
#ifdef LIKWID_MARKERS
  likwid_markerClose();
#endif

  cout << "# bogus = " << bogus << endl;
}
void test(size_t N ) {
    WallClockTimer time;
    for(int t = 0; t<2;++t) {
      cout <<" test # "<< t<<endl;
      vector<int> data = givemeanarray(N) ;


      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          slowishinverseDelta1<mindist>(data);
          cout<<"Slowish(1) inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;

      }

      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          slowishinverseDelta2<mindist>(data);
          cout<<"Slowish(2) inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;

      }

      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDelta<mindist>(data);
          cout<<"Unroll2 inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;

      }

      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaMem<mindist>(data);
          cout<<"Unroll2 (mem) inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;

      }




      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaMy1<mindist>(data);
          cout<<"My1 inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;

      }

#if 0
      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaMy2<mindist>(data);
          cout<<"My2 inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;
      }


      {
          vector<int> copydata(data);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          delta<mindist>(data);
          cout<<"delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaMy3<mindist>(data);
          cout<<"My3 inverse delta speed "<<N/(1000.0*time.split())<<endl;
          if(data != copydata) throw runtime_error("bug!");
          cout<<endl;
      }
#endif


      {
          int* pCopyData = (int*)memalign(16, data.size() * sizeof data[0]);
          if (!pCopyData) {
            throw runtime_error("Not enough memory");
          }
          memcpy(pCopyData, &data[0], data.size() * sizeof data[0]);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          deltaForSIMD<mindist>(pCopyData, data.size());
          cout<<"for SIMD delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaSIMD<mindist>(pCopyData, data.size());
          cout<<"SIMD inverse delta speed "<<N/(1000.0*time.split())<<endl;
          for (size_t i = 0; i < data.size(); ++i) {
            if (data[i] != pCopyData[i]) {
                cerr << "Elem index: " << i << " orig: " << data[i] << " obtained: " << pCopyData[i] << endl;
                throw runtime_error("bug");
             }
          }
          free(pCopyData);
          cout<<endl;
      }

      {
          int* pCopyData = (int*)memalign(16, data.size() * sizeof data[0]);
          if (!pCopyData) {
            throw runtime_error("Not enough memory");
          }
          memcpy(pCopyData, &data[0], data.size() * sizeof data[0]);

          cout << "min distance between ints is "<<mindist<<endl;

          time.reset();
          deltaForSIMD<mindist>(pCopyData, data.size());
          cout<<"for SIMD delta speed "<<N/(1000.0*time.split())<<endl;
          time.reset();
          inverseDeltaSIMDUnrolled<mindist>(pCopyData, data.size());
          cout<<"SIMD inverse UNROLLED delta speed "<<N/(1000.0*time.split())<<endl;
          for (size_t i = 0; i < data.size(); ++i) {
            if (data[i] != pCopyData[i]) {
                cerr << "Elem index: " << i << " orig: " << data[i] << " obtained: " << pCopyData[i] << endl;
                throw runtime_error("bug");
             }
          }
          free(pCopyData);
          cout<<endl;
      }


      cout<<endl<<endl<<endl;
    }
}
Пример #19
0
void simplebenchmark(uint32_t N = 1U << 16, uint32_t T = 1U << 9) {
    T = T + 1; // we have a warming up pass
    vector<uint32_t, cacheallocator> data = generateArray32(N);
    vector<uint32_t, cacheallocator> compressed(N, 0);
    vector<uint32_t, cacheallocator> recovered(N, 0);
    WallClockTimer z;
    double packtime, packtimewm, unpacktime;
    double simdpacktime, simdpacktimewm, simdunpacktime;
    double horizontalunpacktime;

    cout << "#million of integers per second: higher is better" << endl;
    cout << "#bit, pack, pack without mask, unpack" << endl;
    for (uint32_t bitindex = 0; bitindex < 32; ++bitindex) {
        uint32_t bit = 32 - bitindex;
        maskfnc(data, bit);
        for (uint32_t repeat = 0; repeat < 1; ++repeat) {
            packtime = 0;
            packtimewm = 0;
            unpacktime = 0;
            simdpacktime = 0;
            simdpacktimewm = 0;
            simdunpacktime = 0;
            horizontalunpacktime = 0;

            for (uint32_t t = 0; t < T; ++t) {
                compressed.clear();
                compressed.resize(N * bit / 32, 0);
                recovered.clear();
                recovered.resize(N, 0);
                simdpack(data, compressed, bit);
                simdunpack(compressed, recovered, bit);
                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bugs!" << bit << endl;
                    return;
                }

                z.reset();
                simdpack(data, compressed, bit);
                if (t > 0)
                    simdpacktime += z.split();
                simdunpack(compressed, recovered, bit);
                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bugs!" << bit << endl;
                    return;
                }

                z.reset();
                simdpackwithoutmask(data, compressed, bit);
                if (t > 0)
                    simdpacktimewm += z.split();

                z.reset();
                simdunpack(compressed, recovered, bit);
                if (t > 0)
                    simdunpacktime += z.split();

                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bugs!" << bit << endl;
                    return;
                }

                z.reset();
                fastpack(data, compressed, bit);
                if (t > 0)
                    packtime += z.split();
                fastunpack(compressed, recovered, bit);
                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bug1!" << endl;
                    return;
                }

                z.reset();
                fastpackwithoutmask(data, compressed, bit);
                if (t > 0)
                    packtimewm += z.split();

                z.reset();
                fastunpack(compressed, recovered, bit);
                if (t > 0)
                    unpacktime += z.split();

                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bug1!" << endl;
                    return;
                }

                z.reset();
                horizontalunpack(compressed, recovered, bit);
                if (t > 0)
                    horizontalunpacktime += z.split();

                if (!equalOnFirstBits(data, recovered, bit)) {
                    cout << " Bug1!" << endl;
                    return;
                }

            }

            cout << std::setprecision(4) << bit << "\t\t" << N * (T - 1)
                    / (packtime) << "\t\t" << N * (T - 1) / (packtimewm)
                    << "\t\t\t" << N * (T - 1) / (unpacktime) << "\t\t";

            cout << std::setprecision(4) << bit << "\t\t" << N * (T - 1)
                    / (simdpacktime) << "\t\t" << N * (T - 1)
                    / (simdpacktimewm) << "\t\t" << N * (T - 1)
                    / (simdunpacktime) << "\t\t";
            cout<< N * (T - 1)
                            / (horizontalunpacktime) << "\t\t";


            cout << endl;
        }

    }

}
void simplebenchmark(uint32_t N = 1U << 16, uint32_t T = 1U << 9) {
  T = T + 1; // we have a warming up pass
  uint32_t bogus = 0;
  vector<uint32_t> data(N);
  vector<uint32_t> compressed(N);
  vector<uint32_t> icompressed(N);
  vector<uint32_t> recovered(N);
  WallClockTimer z;
  double unpacktime;
  double iunpacktime;

  cout << "#million of integers per second: higher is better" << endl;
  cout << "#bit,  unpack,iunpack" << endl;

  for (uint32_t bitindex = 0; bitindex < 32; ++bitindex) {
    uint32_t bit = bitindex + 1;
    vector<uint32_t> initdata(N);
    for (size_t i = 0; 4 * i < data.size(); i += 4) {
      initdata[i] = random(bit) + (i >= 4 ? initdata[i - 4] : 0);
      for (size_t j = 1; j < 4; ++j) {
        initdata[i + j] = initdata[i];
      }
    }

    const vector<uint32_t> refdata = initdata;
    vector<uint32_t>().swap(initdata);

    icompressed.clear();
    // 4 * N should be enough for all  schemes
    icompressed.resize(4 * N, 0);
    compressed.clear();
    // 4 * N should be enough for all  schemes
    compressed.resize(4 * N, 0);
    recovered.clear();
    recovered.resize(N, 0);

    if (needPaddingTo128Bits(recovered.data())) {
      throw logic_error("Array is not aligned on 128 bit boundary!");
    }
    if (needPaddingTo128Bits(icompressed.data())) {
      throw logic_error("Array is not aligned on 128 bit boundary!");
    }
    if (needPaddingTo128Bits(compressed.data())) {
      throw logic_error("Array is not aligned on 128 bit boundary!");
    }
    if (needPaddingTo128Bits(refdata.data())) {
      throw logic_error("Array is not aligned on 128 bit boundary!");
    }

    for (uint32_t repeat = 0; repeat < 1; ++repeat) {

      unpacktime = 0;

      iunpacktime = 0;

      for (uint32_t t = 0; t <= T; ++t) {

        assert(data.size() == refdata.size());
        fill(icompressed.begin(), icompressed.end(), 0);
        fill(recovered.begin(), recovered.end(), 0);
        memcpy(data.data(), refdata.data(),
               data.size() * sizeof(uint32_t)); // memcpy can be slow
        Helper::pack(data.data(), data.size(), icompressed.data(), bit);
        z.reset();
        Helper::unpack(icompressed.data(), refdata.size(), recovered.data(),
                       bit);
        if (t > 0) // we don't count the first run
          unpacktime += static_cast<double>(z.split());
        if (!equalOnFirstBits(refdata, recovered, bit)) {
          cout << " Bug 1a " << bit << endl;
          return;
        }
        memcpy(data.data(), refdata.data(),
               data.size() * sizeof(uint32_t)); // memcpy can be slow
        Helper::pack(data.data(), data.size(), icompressed.data(), bit);

        z.reset();
        Helper::iunpack(icompressed.data(), refdata.size(), recovered.data(),
                        bit);
        if (t > 0) // we don't count the first run
          iunpacktime += static_cast<double>(z.split());
        if (!equalOnFirstBits(refdata, recovered, bit)) {
          cout << " Bug 2 " << bit << endl;
          return;
        }
      }

      cout << std::setprecision(4) << bit << "\t\t";
      cout << "\t\t" << N * (T - 1) / (unpacktime) << "\t\t";

      cout << "\t\t" << N * (T - 1) / (iunpacktime);

      cout << endl;
    }
  }
  cout << "# ignore this " << bogus << endl;
}