Ejemplo n.º 1
0
void multi_thread_process_loop(vector<string> files1,
                               vector<string> files2) {
  MultithreadData mtdata(threads);
  list<pthread_t> satellite_threads;
  pthread_t writer_thread;
  if (files1.size() == 0) return;
  for (size_t i = 0; i < threads; ++i) {
    pthread_t id;
    if (pthread_create(&id, NULL, satellite_process_consumer_thread,
                       reinterpret_cast<void*>(&mtdata))) {
      PrintMessageDieOnError("Failed to create threads", ERROR);
    }
    satellite_threads.push_back(id);
  }

  if (pthread_create(&writer_thread, NULL, output_writer_thread,
                     reinterpret_cast<void*>(&mtdata))) {
    PrintMessageDieOnError("Failed to create output writer threads", ERROR);
  }

  size_t counter = 1;
  std::string file1;
  std::string file2;
  for (size_t i = 0; i < files1.size(); i++) {
    file1 = files1.at(i);
    if (paired && !bam) {
      file2 = files2.at(i);
      PrintMessageDieOnError("Processing files " + file1 + " and " + file2, PROGRESS);
      if (using_s3) {
        const std::string s3cmd1 = GenerateS3Command(s3bucket,
                                                     file1,
                                                     s3cmd_configfile);
        const std::string s3cmd2 = GenerateS3Command(s3bucket,
                                                     file2,
                                                     s3cmd_configfile);
        if (s3debug) {
          PrintMessageDieOnError("S3 debug: " + s3cmd1, PROGRESS);
          PrintMessageDieOnError("S3 debug: " + s3cmd2, PROGRESS);
        } else {
          if (system(s3cmd1.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file1 from S3", ERROR);
          }
          if (system(s3cmd2.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file2 from S3", ERROR);
          }
        }
        file1 = "/mnt/lobstr/"+file1;
        file2 = "/mnt/lobstr/"+file2;
      }
      if (!(fexists(file1.c_str()) && fexists(file2.c_str()))) {
        PrintMessageDieOnError("File " + file1 + " or " + file2 + " does not exist", WARNING);
        continue;
      }
    } else {
      PrintMessageDieOnError("Processing file " + file1, PROGRESS);
      if (using_s3) {
        const std::string s3cmd = GenerateS3Command(s3bucket,
                                                    file1,
                                                    s3cmd_configfile);
        if (s3debug) {
          PrintMessageDieOnError("S3 debug: " + s3cmd, PROGRESS);
        } else {
          if (system(s3cmd.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file from S3", ERROR);
          }
        }
        file1 = "/mnt/lobstr/"+file1;
        file2 = "/mnt/lobstr/"+file2;
      }
      if (!fexists(file1.c_str())) {
        PrintMessageDieOnError("File " + file1 + " or " + file2 + " does not exist", WARNING);
        continue;
      }
    }
    IFileReader *pReader = create_file_reader(file1, file2);
    do {
      ReadPair *pRecord = new ReadPair;
      pRecord->read_count = counter;
      if (counter % READPROGRESS == 0) {
        stringstream msg;
        msg << "Processed " << counter << ' ' << unit_name;
        PrintMessageDieOnError(msg.str(), PROGRESS);
      }
      if (!pReader->GetNextRecord(pRecord))
        break;  // no more reads
      counter++;
      mtdata.increment_input_counter();
      mtdata.post_new_input_read(pRecord);
      pRecord = NULL;  // the consumers will take it from here, and free it
    } while (1);
    delete pReader;
    if (using_s3) {
      string rmcmd = "rm " + file1;
      if (paired && !bam) {
        rmcmd += "; rm ";
        rmcmd += file2;
      }
      if (s3debug) {
        PrintMessageDieOnError("S3 debug: " + rmcmd, PROGRESS);
      } else {
        if (system(rmcmd.c_str()) != 0) {
          PrintMessageDieOnError("Problem deleting file", ERROR);
        }
      }
    }
  }
  run_info.num_processed_units = counter;

#ifdef DEBUG_THREADS
  PrintMessageDieOnError("No more input, waiting for alignment threads completion", PROGRESS);
#endif
  //Send a 'poison pill' to the alignment threads
  for (size_t i = 0; i < threads; ++i)
    mtdata.post_new_input_read(NULL);

  for (list<pthread_t>::const_iterator it = satellite_threads.begin();
          it != satellite_threads.end(); ++it) {
    int i = pthread_join(*it,NULL);
    if (i != 0) {
       stringstream msg;
       msg << "Failed to join alignment thread " << (*it) <<
              "error code = " << i ;
       PrintMessageDieOnError(msg.str(), WARNING);
    }
  }
  //Send a 'poison pill' to the writer thread
#ifdef DEBUG_THREADS
  PrintMessageDieOnError("waiting for writer thread completion", PROGRESS);
#endif
  mtdata.post_new_output_read(NULL);
  int i = pthread_join(writer_thread,NULL);
  if (i != 0) {
    stringstream msg;
    msg << "Failed to join writer thread " << (writer_thread) <<
           "error code = " << i ;
    PrintMessageDieOnError(msg.str(), WARNING);
  }
#ifdef DEBUG_THREADS
  PrintMessageDieOnError("All thread terminated.", PROGRESS);
#endif

}
Ejemplo n.º 2
0
/*
 * process read in single thread
 */
void single_thread_process_loop(const vector<string>& files1,
                                const vector<string>& files2) {
  ReadPair read_pair;
  SamFileWriter samWriter(output_prefix + ".aligned.bam", chrom_sizes);
  STRDetector *pDetector = new STRDetector();
  BWAReadAligner *pAligner = new BWAReadAligner(&bwt_references,
                                                &bnt_annotations,
                                                &ref_sequences, opts);
  std::string file1;
  std::string file2;
  size_t num_reads_processed = 0;
  for (size_t i = 0; i < files1.size(); i++) {
    file1 = files1.at(i);
    if (paired && !bam) {
      file2 = files2.at(i);
      PrintMessageDieOnError("Processing files " + file1 + " and " + file2, PROGRESS);
      if (using_s3) {
        const std::string s3cmd1 = GenerateS3Command(s3bucket,
                                                     file1,
                                                     s3cmd_configfile);
        const std::string s3cmd2 = GenerateS3Command(s3bucket,
                                                     file2,
                                                     s3cmd_configfile);
        if (s3debug) {
          PrintMessageDieOnError("S3 debug: " + s3cmd1, PROGRESS);
          PrintMessageDieOnError("S3 debug: " + s3cmd2, PROGRESS);
        } else {
          if (system(s3cmd1.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file1 from S3", ERROR);
          }
          if (system(s3cmd2.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file2 from S3", ERROR);
          }
        }
        file1 = "/mnt/lobstr/"+file1;
        file2 = "/mnt/lobstr/"+file2;
      }
      if (!(fexists(file1.c_str()) && fexists(file2.c_str()))) {
        PrintMessageDieOnError("File " + file1 + " or " + file2 + " does not exist", WARNING);
        continue;
      }
    } else {
      PrintMessageDieOnError("Processing file " + file1, PROGRESS);
      if (using_s3) {
        const std::string s3cmd = GenerateS3Command(s3bucket,
                                                    file1,
                                                    s3cmd_configfile);
        if (s3debug) {
          PrintMessageDieOnError("S3 debug: " + s3cmd, PROGRESS);
        } else {
          if (system(s3cmd.c_str()) != 0) {
            PrintMessageDieOnError("Problem fetching file from S3", ERROR);
          }
        }
        file1 = "/mnt/lobstr/"+file1;
      }
      if (!fexists(file1.c_str())) {
        PrintMessageDieOnError("File " + file1 + " does not exist", WARNING);
        continue;
      }
    }
    IFileReader* pReader = create_file_reader(file1, file2);
    int aligned = false;
    std::string repseq = "";
    while (pReader->GetNextRecord(&read_pair)) {
      aligned = false;
      num_reads_processed += 1;
      if (num_reads_processed % READPROGRESS == 0) {
        stringstream msg;
        msg << "Processed " << num_reads_processed << ' ' << unit_name;
        PrintMessageDieOnError(msg.str(), PROGRESS);
      }
      read_pair.read_count = num_reads_processed;
      // reset fields
      read_pair.reads.at(0).repseq = "";
      read_pair.reads.at(0).ms_repeat_best_period = 0;
      read_pair.reads.at(0).ms_repeat_next_best_period = 0;
      if (read_pair.reads.at(0).paired) {
        read_pair.reads.at(1).repseq = "";
        read_pair.reads.at(1).ms_repeat_best_period = 0;
        read_pair.reads.at(1).ms_repeat_next_best_period = 0;
      }

      // Check read length
      if (!(read_pair.reads.at(0).nucleotides.length() >= min_read_length) &&
          (read_pair.reads.at(0).nucleotides.length() <= max_read_length)) {
        continue;
      }
      if (read_pair.reads.at(0).paired) {
        if (!(read_pair.reads.at(1).nucleotides.length() >= min_read_length) &&
            (read_pair.reads.at(1).nucleotides.length() <= max_read_length)) {
          continue;
        }
      }
      bases += read_pair.reads.at(0).nucleotides.length();
      if (read_pair.reads.at(0).paired) bases += read_pair.reads.at(1).nucleotides.length();

      // STEP 1: Sensing
      string det_err, det_messages;
      if (!pDetector->ProcessReadPair(&read_pair, &det_err, &det_messages)) {
        if (debug) {
          PrintMessageDieOnError(GetReadDebug(read_pair, det_err, det_messages, "NA", "NA") + " (detection-fail)", DEBUG);
        }
        continue;
      }
      // STEP 2: Alignment
      string aln_err, aln_messages;
      if (pAligner->ProcessReadPair(&read_pair, &aln_err, &aln_messages)) {
        aligned = true;
        if (debug) { // if aligned, what was the repseq we aligned to
          PrintMessageDieOnError(GetReadDebug(read_pair, det_err, det_messages, aln_err, aln_messages)+ " (aligned-round-1)", DEBUG);
        }
      } else {
        read_pair.read1_passed_detection = false;
        read_pair.read2_passed_detection = false;
        // Try second best period for each read
        if (read_pair.reads.at(0).ms_repeat_next_best_period != 0) {
          read_pair.reads.at(0).ms_repeat_best_period =
            read_pair.reads.at(0).ms_repeat_next_best_period;
          string err, second_best_repseq;
          if (getMSSeq(read_pair.reads.at(0).detected_ms_region_nuc,
                       read_pair.reads.at(0).ms_repeat_best_period, &repseq, &second_best_repseq, &err)) {
            read_pair.reads.at(0).repseq = repseq;
            read_pair.read1_passed_detection = true;
          }
        }
        if (read_pair.reads.at(0).paired) {
          if (read_pair.reads.at(1).ms_repeat_next_best_period != 0) {
            read_pair.reads.at(1).ms_repeat_best_period =
              read_pair.reads.at(1).ms_repeat_next_best_period;
            string err, second_best_repseq;
            if (getMSSeq(read_pair.reads.at(1).detected_ms_region_nuc,
                         read_pair.reads.at(1).ms_repeat_best_period,
                         &repseq, &second_best_repseq, &err)) {
              read_pair.reads.at(1).repseq = repseq;
              read_pair.read2_passed_detection = true;
            }
          }
        }
        if (read_pair.read1_passed_detection ||
            read_pair.read2_passed_detection) {
          if (pAligner->ProcessReadPair(&read_pair, &aln_err, &aln_messages)) {
            aligned = true;
            if (debug) { // if aligned, what was the repseq we aligned to
              PrintMessageDieOnError(GetReadDebug(read_pair, det_err, det_messages, aln_err, aln_messages)+ " (aligned-round-2)", DEBUG);
            }
          }
        }
      }
      if (aligned) {
        samWriter.WriteRecord(read_pair);
      } else {
        if (debug) { // if didn't align, print this
          PrintMessageDieOnError(GetReadDebug(read_pair, det_err, det_messages, aln_err, aln_messages)+ " (not-aligned)", DEBUG);
        }
      }
    }
    delete pReader;
    stringstream msg;
    msg << "Processed " << num_reads_processed << ' ' << unit_name;
    PrintMessageDieOnError(msg.str(), PROGRESS);
    if (using_s3) {
      string rmcmd = "rm " + file1;
      if (paired && !bam) {
        rmcmd += "; rm ";
        rmcmd += file2;
      }
      if (s3debug) {
        PrintMessageDieOnError("S3 debug: " + rmcmd, PROGRESS);
      } else {
        if (system(rmcmd.c_str()) != 0) {
          PrintMessageDieOnError("Problem deleting file", ERROR);
        }
      }
    }
  }
  delete pDetector;
  delete pAligner;
  run_info.num_processed_units = num_reads_processed;
}