int main() {
  const int N = 100 * 1000 * 1000;
  vector<int> array;
  array.resize(N);
  for(int k = 0; k<N;++k)
    array[k]=k;
  random_shuffle(array.begin(), array.end());
  ZTimer z;
  for(int j = 0; j<10;++j) {
	  int max = INT_MIN;
	  int bestk = 0;
	  for (int k = 0; k<N; ++k) {
		if(array[k]>max) {
			max = array[k];
			bestk = k;
		}
	  }
      cout<<" "<< bestk<<endl;
  }
  cout<<z.split()<<endl;
  z.reset();
  for(int j = 0; j<10;++j) {
	  int max = INT_MIN;
	  for (int k = 0; k<N; ++k) {
		if(array[k]>max) max = array[k];
	  }
	  for (int k = 0; k<N;++k) 
	   if(array[k] == max) {
	        cout<<" "<< k<<endl;
	        break;
	   }
  }
  cout<<z.split()<<endl;
  return 0;
}
예제 #2
0
double testSpeed(const T & hasher, const vector<INTEGER> & data, uint64 & answer, const uint mN) {
	ZTimer t;
	double timeelapsed;
	t.reset();
	answer += hasher.hash(&data[0],&data[0]+mN);
	timeelapsed = t.split()/(1000.0);
	return timeelapsed;
}
예제 #3
0
double testSpeedManyTimes(const T & hasher, const vector<INTEGER> & data, uint64 & answer, const uint mN,const uint times) {
	ZTimer t;
	double timeelapsed;
	t.reset();
    for(uint k = 0; k<times;++k)
	answer += hasher.hash(&data[0],&data[0]+mN);
	timeelapsed = t.split()/(1000.0);
	return timeelapsed;
}
void __displayStats(CSVFlatFile & ff) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	cout<<"# fraction of tuples with zeroes = "<<  rs.countZeroes() * 1. / (rs.data.size() * c)<<endl;
	cout<<"# number of attribute values = "<< ff.numberOfAttributeValues()<<endl;
	cout<<"# excepted fraction = "<<c * 1.0 / ff.numberOfAttributeValues()<<endl;
}
void testCodec(SimpleCODEC & mycodec, MyNaiveColumnStore & n,
		vector<Results> & v, const uint smallsetrepeats) {
	const uint uncompressedsize = n.size();
	cout << "# computing " << mycodec.name() << " ... " << endl;
	if (uncompressedsize == 0)
		return;
	Results r(mycodec.name());
	for (uint columnindex = 0; columnindex < n.data.size(); ++columnindex) {
		uint compressiontime(0), decompressiontime(0);
		double sizeinmb(0);
		vector<uint> incolumn;
		const uint MAXSIZE=10*1024*1024;// 50 million or about 50MB
		for(uint64 begin = 0; begin<n.data[columnindex].size(); begin+=MAXSIZE) {
			uint64 end = begin+MAXSIZE;
			if(end > n.data[columnindex].size())
				end = n.data[columnindex].size();
		    n.data[columnindex].loadACopy(incolumn,begin,end);
			ZTimer z;
			columntype out;
			for (uint k = 0; k < smallsetrepeats; ++k) {
				out.clear();
				mycodec.compress(incolumn,out);
			}
			compressiontime += z.split();
			sizeinmb += (out.size() * 1.0 / (1024.0 * 1024.0));
			z.reset();
			
			for (uint k = 0; k < smallsetrepeats; ++k) {
				columntype recovered;
				mycodec.uncompress(out, recovered);
			}

			decompressiontime += z.split();
		}
		r.add(sizeinmb, compressiontime, decompressiontime);
	}
	v.push_back(r);
}
void __growCSV(CSVFlatFile & ff,  int columnorderheuristic) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	//printMemoryUsage();
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	cout << "# detected " << c << " columns" << endl;
	vector<uint> indexes = ff.computeColumnOrderAndReturnColumnIndexes(
			columnorderheuristic);
	cout<<"# clearing histogram memory..."<<endl;
	ff.clear();
	rs.sortRows(indexes);
	NaiveColumnStore<c> ncs;
	for(uint k = 131072*20; k<=rs.data.size();k+=131072*20) {
        //rstmp.sortRows(indexes);
		ncs.reloadFromRowStore(rs,k);
		cout<<"#=============================#"<<endl;
		cout<<"# number of rows = "<<k<<endl;
		cout<<"#=============================#"<<endl;
		cout<<"# lexico  "<<endl;
		runtests(ncs, true,  true);
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		ncs.clear();
		cout<<"############################"<<endl;
		cout<<"# multiple list  "<<endl;
		RowStore<c> rstmp;
		rs.top(k,rstmp);
		rstmp.MultipleListsSortRowsPerBlock(indexes, 131072);
		ncs.reloadFromRowStore(rstmp);
		runtests(ncs, true,  true);
		rstmp.clear();
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		ncs.clear();
		cout<<endl;
	}
	rs.clear();
	ncs.clear();
}
예제 #7
0
int main(int params, char ** args) {
	uint N;
	if (params >= 2) 
	  N = atoi(args[1]);
	else {
	  N = 1024*1024;
	}
    cout<<"#sizeof(uint64)="<<sizeof(uint64)<<endl;
    assert(sizeof(uint64)==8);
	cout<<"# Initializing data..."<<endl;
	vector<uint32>data(N);
	vector<uint64>randombuffer64(N+1);
	//MTRand mt;
	ZRandom zr;
	for(uint k = 0;k<N;++k) {
		data[k] =  zr.getValue();//mt.randInt();
	}
	data.push_back(1);// so that it never ends with a zero
	if( (data.size() & 1) != 0) data.push_back(0);// make sure it is even
	cout<<"# done generating data"<<endl;
	cout<<"# "<<data.size()*sizeof(uint)/(1024.0*1024.0)<<" MB"<<endl;
	ZTimer t;
	for(uint k = 0;k<=N+2;++k) {
		randombuffer64[k] =zr.getValue() & (static_cast<uint64>(zr.getValue())<<32 ) ;
	}
	double timeelapsed = t.split()/(1000.0);
	cout<<"# random generated in "<<timeelapsed<< " or "<<N*sizeof(uint64)/(1024.0*1024*1024*timeelapsed)<<" GB/s"<<endl;
	cout<<"# "<<data.size()*sizeof(uint64)/(1024.0*1024.0)<<" MB"<<endl;
	const uint times = 20;
	uint64 answer = 0;
  	Silly silly;
	Thorup thorup(randombuffer64);
  	StrongMultilinear sm(randombuffer64);
  	PyramidalMultilinear pm(randombuffer64);
  	XAMA xama(randombuffer64);
#if defined (__PCLMUL__) && (__SSE2__)
    CLStrongMultilinear clsm(randombuffer64); 
#endif
  	NoMultiplication testing(randombuffer64);
  	StrongMultilinearTwoByTwo sm2by2(randombuffer64);
    const uint shorttimes =2000000;
	cout<<"# Starting tests... repeating each run "<<shorttimes<<" times"<<endl;
	cout<<"# N silly thorup09(not-strongly-universal) xama strong-multilinear strong-multilinear-2by2 pyramidalmultilinear clmulti*"<<endl;
	for(uint mN = 1024; mN<=2048; mN*=2) {
		vector<double> counter(6,0); 
  	  	counter[0]+=testSpeedManyTimes(silly,data,answer,mN,shorttimes);
	  	counter[1]+=testSpeedManyTimes(thorup,data,answer,mN,shorttimes);
	  	counter[2]+=testSpeedManyTimes(xama,data,answer,mN,shorttimes);
	  	counter[3]+=testSpeedManyTimes(sm,data,answer,mN,shorttimes);
	  	counter[4]+=testSpeedManyTimes(sm2by2,data,answer,mN,shorttimes);
	  	counter[5]+=testSpeedManyTimes(pm,data,answer,mN,shorttimes);
#if defined (__PCLMUL__) && (__SSE2__)
	  	counter[6]+=testSpeedManyTimes(clsm,data,answer,mN,shorttimes);
#endif
        cout<<mN<<" ";
        for(uint k = 0; k<counter.size();++k)
            cout<<counter[k]<<" ";
        cout<<endl;
	}
    cout<<endl;
	cout<<"# Starting tests... repeating each run "<<times<<" times"<<endl;
	cout<<"# N silly thorup09(not-strongly-universal) xama strong-multilinear strong-multilinear-2by2 pyramidalmultilinear clmulti*"<<endl;
	for(uint mN = 1048576; mN<=data.size(); mN*=2) {
		vector<double> counter(6,0); 
		for(uint k = 0;k<times;++k) {
  	  		counter[0]+=testSpeed(silly,data,answer,mN);
	  		counter[1]+=testSpeed(thorup,data,answer,mN);
	  	    counter[2]+=testSpeedManyTimes(xama,data,answer,mN,shorttimes);
	  		counter[3]+=testSpeed(sm,data,answer,mN);
	  		counter[4]+=testSpeed(sm2by2,data,answer,mN);
	  		counter[5]+=testSpeed(pm,data,answer,mN);
#if defined (__PCLMUL__) && (__SSE2__)
	  	counter[6]+=testSpeedManyTimes(clsm,data,answer,mN,shorttimes);
#endif
		}
        cout<<mN<<" ";
        for(uint k = 0; k<counter.size();++k)
            cout<<counter[k]<<" ";
        cout<<endl;
	}
    cout<<endl;

	return answer;	
}
void __scaleCSV(CSVFlatFile & ff) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	cout << "# detected " << c << " columns" << endl;
	vector<uint> indexes = ff.computeColumnOrderAndReturnColumnIndexes(
			INCREASINGCARDINALITY);
	cout<<"# clearing histogram memory..."<<endl;
	ff.clear();

	cout<<"# sorting..."<<endl;
	if (true) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows" << endl;
		z.reset();
		NaiveColumnStore<c> ncs;
		ncs.reloadFromRowStore(rs);
		//rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
				BLOCKSIZE) << endl;
		cout << "# block size = " << BLOCKSIZE << endl;
		runtests(ncs, true,true);
		cout<<endl;
	}
	uint numberofrows = rs.data.size();
	cout << "# detected " << numberofrows << " rows" << endl;
	if (true) {
		for (uint blocksize = 16; blocksize <= min(8388608,numberofrows); blocksize *= 2) {
			cout << "# blocksize " << blocksize << " rows" << endl;
			z.reset();
			rs.MultipleListsSortRowsPerBlock(indexes, blocksize);//65536);
			cout << "# " << z.split()
					<< " ms to sort rows in multiplelists order with blocksize =  "
					<< blocksize << endl;
			z.reset();
			NaiveColumnStore<c> ncs;
			ncs.reloadFromRowStore(rs);
			//rs.clear();
			cout << "# " << z.split() << " ms to reload " << ncs.size()
					<< " bytes into column store" << endl;
			cout << "# got RunCount = " << ncs.computeRunCount() << endl;
			cout << "# got RunCount" << BLOCKSIZE << " = "
					<< ncs.computeRunCountp(BLOCKSIZE) << endl;
			cout << "# block size = " << BLOCKSIZE << endl;
			runtests(ncs, true,true);
			cout << endl;
		}
	}
	if(false) {
		z.reset();
		rs.vortexSortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows in vortex order" << endl;
		z.reset();
		NaiveColumnStore<c> ncs;
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
			<< " bytes into column store" << endl;
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
				BLOCKSIZE) << endl;
		cout << "# block size = " << BLOCKSIZE << endl;
		runtests(ncs, true,true);
		cout<<endl;
	}
}
void __readCSV(CSVFlatFile & ff, int sort, int columnorderheuristic,
		bool skiprepeats, const uint sample, const uint64 maxsize, const bool makeColumnIndependent) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	//printMemoryUsage();
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	if(sample>0) {
		    z.reset();
 		    RowStore<c> rstmp;
			rs.fillWithSample(sample,rstmp);
			rs.data.swap(rstmp.data);
			cout << "# " << z.split() << " ms to extract sample containing "<< sample<<" tuples" << endl;

	}
	cout << "# detected " << c << " columns" << endl;
	vector<uint> indexes = ff.computeColumnOrderAndReturnColumnIndexes(
			columnorderheuristic);
	cout<<"# clearing histogram memory..."<<endl;
	ff.clear();
	//cout<<"# fraction of tuples with zeroes = "<<  rs.countZeroes() * 1. / (rs.data.size() * c)<<endl;
	cout<<"# sorting..."<<endl;
	NaiveColumnStore<c> ncs;
	if(makeColumnIndependent) {
		cout<<"# shuffling columns independently"<<endl;
		cout<<"# shuffling columns independently (part 1: loading into column store)"<<endl;
		ncs.reloadFromRowStore(rs);
		cout<<"# shuffling columns independently (part 2: shuffling)"<<endl;
		ncs.makeColumnsIndependent();
		cout<<"# shuffling columns independently (part 3: copying back to row store)"<<endl;
		ncs.copyToRowStore(rs);
	}
	if (sort == LEXICO) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == MULTIPLELISTS) {
		cout << "not supported" << endl;
	} else if (sort == BLOCKWISEMULTIPLELISTS) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows lexicographically"
				<< endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		rs.MultipleListsSortRowsPerBlock(indexes, 131072);//65536);
		cout << "# " << z.split() << " ms to sort rows in multiplelists order"
				<< endl;
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == VORTEX) {
		z.reset();
		rs.vortexSortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows in vortex order" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == GRAYCODED) {
		cerr << "not supported" << endl;
	} else {// shuffling
		z.reset();
		rs.shuffleRows();
		cout << "# " << z.split() << " ms to shuffle rows" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	}
	cout << "# got RunCount = " << ncs.computeRunCount() << endl;
	cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
			BLOCKSIZE) << endl;
	cout << "# block size = " << BLOCKSIZE << endl;
	runtests(ncs, skiprepeats);
	ncs.clear();
}