int main() {
  const int N = 100 * 1000 * 1000;
  vector<int> array;
  array.resize(N);
  for(int k = 0; k<N;++k)
    array[k]=k;
  random_shuffle(array.begin(), array.end());
  ZTimer z;
  for(int j = 0; j<10;++j) {
	  int max = INT_MIN;
	  int bestk = 0;
	  for (int k = 0; k<N; ++k) {
		if(array[k]>max) {
			max = array[k];
			bestk = k;
		}
	  }
      cout<<" "<< bestk<<endl;
  }
  cout<<z.split()<<endl;
  z.reset();
  for(int j = 0; j<10;++j) {
	  int max = INT_MIN;
	  for (int k = 0; k<N; ++k) {
		if(array[k]>max) max = array[k];
	  }
	  for (int k = 0; k<N;++k) 
	   if(array[k] == max) {
	        cout<<" "<< k<<endl;
	        break;
	   }
  }
  cout<<z.split()<<endl;
  return 0;
}
Пример #2
0
double testSpeed(const T & hasher, const vector<INTEGER> & data, uint64 & answer, const uint mN) {
	ZTimer t;
	double timeelapsed;
	t.reset();
	answer += hasher.hash(&data[0],&data[0]+mN);
	timeelapsed = t.split()/(1000.0);
	return timeelapsed;
}
Пример #3
0
double testSpeedManyTimes(const T & hasher, const vector<INTEGER> & data, uint64 & answer, const uint mN,const uint times) {
	ZTimer t;
	double timeelapsed;
	t.reset();
    for(uint k = 0; k<times;++k)
	answer += hasher.hash(&data[0],&data[0]+mN);
	timeelapsed = t.split()/(1000.0);
	return timeelapsed;
}
void testCodec(SimpleCODEC & mycodec, MyNaiveColumnStore & n,
		vector<Results> & v, const uint smallsetrepeats) {
	const uint uncompressedsize = n.size();
	cout << "# computing " << mycodec.name() << " ... " << endl;
	if (uncompressedsize == 0)
		return;
	Results r(mycodec.name());
	for (uint columnindex = 0; columnindex < n.data.size(); ++columnindex) {
		uint compressiontime(0), decompressiontime(0);
		double sizeinmb(0);
		vector<uint> incolumn;
		const uint MAXSIZE=10*1024*1024;// 50 million or about 50MB
		for(uint64 begin = 0; begin<n.data[columnindex].size(); begin+=MAXSIZE) {
			uint64 end = begin+MAXSIZE;
			if(end > n.data[columnindex].size())
				end = n.data[columnindex].size();
		    n.data[columnindex].loadACopy(incolumn,begin,end);
			ZTimer z;
			columntype out;
			for (uint k = 0; k < smallsetrepeats; ++k) {
				out.clear();
				mycodec.compress(incolumn,out);
			}
			compressiontime += z.split();
			sizeinmb += (out.size() * 1.0 / (1024.0 * 1024.0));
			z.reset();
			
			for (uint k = 0; k < smallsetrepeats; ++k) {
				columntype recovered;
				mycodec.uncompress(out, recovered);
			}

			decompressiontime += z.split();
		}
		r.add(sizeinmb, compressiontime, decompressiontime);
	}
	v.push_back(r);
}
void __scaleCSV(CSVFlatFile & ff) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	cout << "# detected " << c << " columns" << endl;
	vector<uint> indexes = ff.computeColumnOrderAndReturnColumnIndexes(
			INCREASINGCARDINALITY);
	cout<<"# clearing histogram memory..."<<endl;
	ff.clear();

	cout<<"# sorting..."<<endl;
	if (true) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows" << endl;
		z.reset();
		NaiveColumnStore<c> ncs;
		ncs.reloadFromRowStore(rs);
		//rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
				BLOCKSIZE) << endl;
		cout << "# block size = " << BLOCKSIZE << endl;
		runtests(ncs, true,true);
		cout<<endl;
	}
	uint numberofrows = rs.data.size();
	cout << "# detected " << numberofrows << " rows" << endl;
	if (true) {
		for (uint blocksize = 16; blocksize <= min(8388608,numberofrows); blocksize *= 2) {
			cout << "# blocksize " << blocksize << " rows" << endl;
			z.reset();
			rs.MultipleListsSortRowsPerBlock(indexes, blocksize);//65536);
			cout << "# " << z.split()
					<< " ms to sort rows in multiplelists order with blocksize =  "
					<< blocksize << endl;
			z.reset();
			NaiveColumnStore<c> ncs;
			ncs.reloadFromRowStore(rs);
			//rs.clear();
			cout << "# " << z.split() << " ms to reload " << ncs.size()
					<< " bytes into column store" << endl;
			cout << "# got RunCount = " << ncs.computeRunCount() << endl;
			cout << "# got RunCount" << BLOCKSIZE << " = "
					<< ncs.computeRunCountp(BLOCKSIZE) << endl;
			cout << "# block size = " << BLOCKSIZE << endl;
			runtests(ncs, true,true);
			cout << endl;
		}
	}
	if(false) {
		z.reset();
		rs.vortexSortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows in vortex order" << endl;
		z.reset();
		NaiveColumnStore<c> ncs;
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
			<< " bytes into column store" << endl;
		cout << "# got RunCount = " << ncs.computeRunCount() << endl;
		cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
				BLOCKSIZE) << endl;
		cout << "# block size = " << BLOCKSIZE << endl;
		runtests(ncs, true,true);
		cout<<endl;
	}
}
void __readCSV(CSVFlatFile & ff, int sort, int columnorderheuristic,
		bool skiprepeats, const uint sample, const uint64 maxsize, const bool makeColumnIndependent) {
	ZTimer z;
	cout<<"#Loading into row store..."<<endl;
	//printMemoryUsage();
	RowStore<c> rs(ff,0);
	ff.close();
	cout << "# " << z.split() << " ms to load " << rs.size()
			<< " bytes into row store" << endl;
	if(sample>0) {
		    z.reset();
 		    RowStore<c> rstmp;
			rs.fillWithSample(sample,rstmp);
			rs.data.swap(rstmp.data);
			cout << "# " << z.split() << " ms to extract sample containing "<< sample<<" tuples" << endl;

	}
	cout << "# detected " << c << " columns" << endl;
	vector<uint> indexes = ff.computeColumnOrderAndReturnColumnIndexes(
			columnorderheuristic);
	cout<<"# clearing histogram memory..."<<endl;
	ff.clear();
	//cout<<"# fraction of tuples with zeroes = "<<  rs.countZeroes() * 1. / (rs.data.size() * c)<<endl;
	cout<<"# sorting..."<<endl;
	NaiveColumnStore<c> ncs;
	if(makeColumnIndependent) {
		cout<<"# shuffling columns independently"<<endl;
		cout<<"# shuffling columns independently (part 1: loading into column store)"<<endl;
		ncs.reloadFromRowStore(rs);
		cout<<"# shuffling columns independently (part 2: shuffling)"<<endl;
		ncs.makeColumnsIndependent();
		cout<<"# shuffling columns independently (part 3: copying back to row store)"<<endl;
		ncs.copyToRowStore(rs);
	}
	if (sort == LEXICO) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == MULTIPLELISTS) {
		cout << "not supported" << endl;
	} else if (sort == BLOCKWISEMULTIPLELISTS) {
		z.reset();
		rs.sortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows lexicographically"
				<< endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		rs.MultipleListsSortRowsPerBlock(indexes, 131072);//65536);
		cout << "# " << z.split() << " ms to sort rows in multiplelists order"
				<< endl;
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == VORTEX) {
		z.reset();
		rs.vortexSortRows(indexes);
		cout << "# " << z.split() << " ms to sort rows in vortex order" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	} else if (sort == GRAYCODED) {
		cerr << "not supported" << endl;
	} else {// shuffling
		z.reset();
		rs.shuffleRows();
		cout << "# " << z.split() << " ms to shuffle rows" << endl;
		if(maxsize>0) rs.top(maxsize,rs);
		z.reset();
		ncs.reloadFromRowStore(rs);
		rs.clear();
		cout << "# " << z.split() << " ms to reload " << ncs.size()
				<< " bytes into column store" << endl;
	}
	cout << "# got RunCount = " << ncs.computeRunCount() << endl;
	cout << "# got RunCount" << BLOCKSIZE << " = " << ncs.computeRunCountp(
			BLOCKSIZE) << endl;
	cout << "# block size = " << BLOCKSIZE << endl;
	runtests(ncs, skiprepeats);
	ncs.clear();
}