예제 #1
0
void sort_run( indri::file::File& out, indri::file::File& in, size_t memory ) {
  // read the data in
  UINT64 length = in.size();
  char* data = new char[length];
  in.read( data, 0, length );
  in.close();

  qsort( data, length / 12, 12, sort_comparator );

  out.write( data, 0, length );
  delete[] data;
}
예제 #2
0
void convert_intscore_to_long_binary( indri::file::File& outfile, const char* infile ) {
  std::ifstream in;
  in.open( infile );
  
  indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 );

  while( !in.eof() ) {
    int document;
    double score;

    in >> document
       >> score;
  
    outb->write( (const void*) &document, sizeof(UINT32) );
    outb->write( (const void*) &score, sizeof(double) );
  }
  
  outb->flush();
  delete outb;
  
  outfile.close();
  in.close();
}
예제 #3
0
void sort_file( indri::file::File& out, indri::file::File& in, size_t memory, int totalDocuments ) {
  UINT64 length = in.size();
  size_t rounded = (memory / 12) * 12;
    
  UINT64 total = 0;
  std::vector<std::string> temporaries;

  while( length > total ) {
    UINT64 chunk = lemur_compat::min<UINT64>( rounded, length - total );
    indri::file::File tempIn;
    indri::file::File tempOut;
    std::string nameIn;
    std::string nameOut;
  
    tempIn.openTemporary( nameIn );
    tempOut.openTemporary( nameOut );

    // make a sorted run
    copy_region( tempIn, in, total, chunk );
    sort_run( tempOut, tempIn, memory );
    
    tempIn.close();
    tempOut.close();
    lemur_compat::remove( nameIn.c_str() );
    temporaries.push_back( nameOut );
    
    total += chunk;
  }

  in.close();
  merge_sorted_runs( out, temporaries, totalDocuments );
  
  for( size_t i=0; i<temporaries.size(); i++ ) {
    lemur_compat::remove( temporaries[i].c_str() );
  }
}