void sort_run( indri::file::File& out, indri::file::File& in, size_t memory ) { // read the data in UINT64 length = in.size(); char* data = new char[length]; in.read( data, 0, length ); in.close(); qsort( data, length / 12, 12, sort_comparator ); out.write( data, 0, length ); delete[] data; }
void convert_intscore_to_long_binary( indri::file::File& outfile, const char* infile ) { std::ifstream in; in.open( infile ); indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 ); while( !in.eof() ) { int document; double score; in >> document >> score; outb->write( (const void*) &document, sizeof(UINT32) ); outb->write( (const void*) &score, sizeof(double) ); } outb->flush(); delete outb; outfile.close(); in.close(); }
void sort_file( indri::file::File& out, indri::file::File& in, size_t memory, int totalDocuments ) { UINT64 length = in.size(); size_t rounded = (memory / 12) * 12; UINT64 total = 0; std::vector<std::string> temporaries; while( length > total ) { UINT64 chunk = lemur_compat::min<UINT64>( rounded, length - total ); indri::file::File tempIn; indri::file::File tempOut; std::string nameIn; std::string nameOut; tempIn.openTemporary( nameIn ); tempOut.openTemporary( nameOut ); // make a sorted run copy_region( tempIn, in, total, chunk ); sort_run( tempOut, tempIn, memory ); tempIn.close(); tempOut.close(); lemur_compat::remove( nameIn.c_str() ); temporaries.push_back( nameOut ); total += chunk; } in.close(); merge_sorted_runs( out, temporaries, totalDocuments ); for( size_t i=0; i<temporaries.size(); i++ ) { lemur_compat::remove( temporaries[i].c_str() ); } }