Example #1
0
/* next_fastqs
   Read the next forward and reverse fastq sequences.
   Check to make sure their ID's are compatible and
   put the results in the next SQP of SQPDB. Grow
   this, if necessary.
 */
inline bool next_fastqs( gzFile ffq, gzFile rfq, SQP curr_sqp, bool p64 ) {
  int frs; // forward fastq read status
  int rrs; // reverse fastq read status
  size_t id1len = 0;
  size_t id2len = 0;
  /* Read the next fastq record from the forward and reverse
     pair of each */

  //make sure everything is fresh...
  memset(curr_sqp->fid,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rid,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->fseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rc_rseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->fqual,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rqual,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->merged_seq,'\0',MAX_SEQ_LEN+MAX_SEQ_LEN);
  memset(curr_sqp->merged_qual,'\0',MAX_SEQ_LEN+MAX_SEQ_LEN);
  memset(curr_sqp->rc_rqual,'\0',MAX_SEQ_LEN);
  curr_sqp->flen = curr_sqp->rlen = 0;


  //

  frs = read_fastq( ffq, curr_sqp->fid, curr_sqp->fseq, 
      curr_sqp->fqual, &id1len, &(curr_sqp->flen), p64 );
  rrs = read_fastq( rfq, curr_sqp->rid, curr_sqp->rseq, 
      curr_sqp->rqual, &id2len, &(curr_sqp->rlen), p64 );

  //  //reverse comp the second read for overlapping and everything.
  //  strcpy(curr_sqp->rc_rseq,curr_sqp->rseq);
  //  strcpy(curr_sqp->rc_rqual,curr_sqp->rqual);
  //  revcom_seq(curr_sqp->rc_rseq,curr_sqp->rlen);
  //  rev_qual(curr_sqp->rc_rqual,curr_sqp->rlen);

  if ( (frs == 1) &&
      (rrs == 1) &&
      f_r_id_check( curr_sqp->fid, id1len, curr_sqp->rid, id2len ) ) {
    strncpy(curr_sqp->rc_rseq,curr_sqp->rseq,curr_sqp->rlen+1);
    strncpy(curr_sqp->rc_rqual,curr_sqp->rqual,curr_sqp->rlen+1);
    rev_qual(curr_sqp->rc_rqual, curr_sqp->rlen);
    revcom_seq(curr_sqp->rc_rseq, curr_sqp->rlen);
    return true;
  } else {
    return false;
  }
}
Example #2
0
/**
 * look for adapters by read overlap
 *
 */
bool read_olap_adapter_trim(SQP sqp, size_t min_ol_adapter,
    unsigned short min_match_adapter[MAX_SEQ_LEN+1],
    unsigned short max_mismatch_adapter[MAX_SEQ_LEN+1],
    unsigned short min_match_reads[MAX_SEQ_LEN+1],
    unsigned short max_mismatch_reads[MAX_SEQ_LEN+1],
    char qcut){
  ////////////
  // Look at the adapter overhang
  // Starting from our minimum adapter overlap
  // check to see if there is total overlap with
  //Round1:
  //       ---------- Subj
  //       ---------- Query
  //Round2:
  //      ----------  Subj
  //     ----------   Query
  //...
  //we can get this effect by swapping the query and subj, and then have a high minimum
  //overlap
  char *queryseq= sqp->rc_rseq;
  char *queryqual= sqp->rc_rqual;
  char *subjseq= sqp->fseq;
  char *subjqual= sqp->fqual;
  int querylen = sqp->rlen;
  int subjlen = sqp->flen;

  int ppos = compute_ol(
      queryseq, queryqual, querylen,
      subjseq, subjqual, subjlen,
      //min(subjlen,min(min_ol_adapter,querylen)),
      max(0,min(querylen,subjlen)-min_ol_adapter-1),
      min_match_reads, max_mismatch_reads,
      true, qcut ); //pass true here so ambiguous matches are avoided
  if(ppos != CODE_NOMATCH && ppos != CODE_AMBIGUOUS){
    //we have a match, trim the adapter!
    if(ppos == 0){
      //no adapter
      return false;
    }else{
      //ppos gives us the shift to the left of the query
      // One case:
      //   ----X------- fread
      // -X----         rread
      // Another case:
      //   ---X-        fread
      // -X---          rread
      // Another case:
      //   ----         fread
      // -X----X-       rread


      //first calc rlen after the first clip
      sqp->rlen -= ppos;

      //now in the first two cases shown above, the other cut point is just the
      //new rlen
      if(sqp->rlen <= sqp->flen)
        sqp->flen = sqp->rlen;
      //otherwise leave sqp->flen alone
      else if(sqp->rlen > sqp->flen){
        // Another case:
        //   ----         fread
        // -X----X---     rread
        // make initial cut to rc read
        sqp->rc_rqual[ppos + sqp->flen] = '\0';
        sqp->rc_rseq[ppos + sqp->flen] = '\0';
        strncpy(sqp->rseq,sqp->rc_rseq,ppos + sqp->flen+1); //move RC reads into reg place and reverse them
        strncpy(sqp->rqual,sqp->rc_rqual,ppos + sqp->flen+1);
        rev_qual(sqp->rqual, ppos + sqp->flen);
        revcom_seq(sqp->rseq, ppos + sqp->flen);

        //now we have our end cut in place in the regular reads
        sqp->rlen = sqp->flen;

      }

      //now cases have been handled and length has been determined
      sqp->fseq[sqp->flen] = '\0';
      sqp->fqual[sqp->flen] = '\0';
      sqp->rseq[sqp->rlen] = '\0';
      sqp->rqual[sqp->rlen] = '\0';
      // now re-reverse complement the sequences
      strncpy(sqp->rc_rseq,sqp->rseq,sqp->rlen+1);
      strncpy(sqp->rc_rqual,sqp->rqual,sqp->rlen+1);
      rev_qual(sqp->rc_rqual, sqp->rlen);
      revcom_seq(sqp->rc_rseq, sqp->rlen);
      return true;
    }
  }
  return false;
}
Example #3
0
/**
 * After performing overlap post adapter trimming,
 *  this trims the ends of the reads the same way as when
 *  they are merged, but otherwise leaves the nucleotides
 *  the same as they were.
 */
void make_blunt_ends(SQP sqp, AlnAln *aln){
  int len = strlen(aln->out1);
  char *out1, *out2;
  out1 = aln->out1;
  out2 = aln->out2;
  int i,p1,p2; //p1,2 store pointers to corresponding pos in original seqs
  p1 = p2 = 0;
  char c1,c2,q1,q2,t1,t2;
  bool trim_overhang = true;
  bool end_gaps;
  bool begin_gaps = trim_overhang;
  int j1 = 0;
  int j2 = 0;
  int k;
  for(i=0;i<len;i++){
    c1 = toupper(out1[i]);
    c2 = toupper(out2[i]);
    q1 = sqp->fqual[p1];
    q2 = sqp->rc_rqual[p2];
    if(isXDNA(c1) && isXDNA(c2)){

      sqp->fseq[j1] = c1;
      sqp->fqual[j1] = q1;
      sqp->rc_rseq[j2] = c2;
      sqp->rc_rqual[j2] = q2;


      //case 1 both are DNA, choose one with best score and subtract
      if (begin_gaps) begin_gaps = false; //switch it off now that we have seen a match


      //increment both positions of the reads
      p1++;
      p2++;
      j1++;
      j2++;
    }else if(isXDNA(c1)){
      // c2 is a gap
      if (!begin_gaps){
        sqp->fseq[j1] = c1;
        sqp->fqual[j1] = q1;
        //now check to see if we are done:
        if(trim_overhang){
          end_gaps = true;
          for(k=i;k<len;k++){
            t2 = out2[k];
            if(t2 != '-'){
              end_gaps = false;
              break;
            }
          }
          if(end_gaps){
            //everything after this is a gap
            break;
          }
        }
        j1++;
      }
      //increment the first
      p1++;
    }else if(isXDNA(c2)){
      //c1 is a gap
      if(!begin_gaps){
        sqp->rc_rseq[j2] = c2;
        sqp->rc_rqual[j2] = q2;
        if(trim_overhang){
          end_gaps = true;
          for(k=i;k<len;k++){
            t1 = out1[k];
            if(t1 != '-'){
              end_gaps = false;
              break;
            }
          }
          if(end_gaps){
            //everything after this is a gap
            break;
          }
        }
        j2++;
      }
      //increment the second
      p2++;
    }
  }

  sqp->fseq[j1] = '\0';
  sqp->fqual[j1] = '\0';
  sqp->flen = j1;
  sqp->rc_rseq[j2] = '\0';
  sqp->rc_rqual[j2] = '\0';
  sqp->rlen = j2;
  strncpy(sqp->rseq,sqp->rc_rseq,sqp->rlen+1);
  strncpy(sqp->rqual,sqp->rc_rqual,sqp->rlen+1);
  rev_qual( sqp->rqual, sqp->rlen );
  revcom_seq( sqp->rseq, sqp->rlen);

}
Example #4
0
/**
 * adapter_trim:
 *
 *
 */
bool adapter_trim(SQP sqp, size_t min_ol_adapter,
    char *forward_primer, char *forward_primer_dummy_qual,
    int forward_primer_len,
    char *reverse_primer, char *reverse_primer_dummy_qual,
    int reverse_primer_len,
    unsigned short min_match_adapter[MAX_SEQ_LEN+1],
    unsigned short max_mismatch_adapter[MAX_SEQ_LEN+1],
    unsigned short min_match_reads[MAX_SEQ_LEN+1],
    unsigned short max_mismatch_reads[MAX_SEQ_LEN+1],
    char qcut){
  //adapters on reads if the insert size is less than the read length, the adapter
  // appears at the end of the sequence.


  /**
   * First check for adapter match before the first position of the read
   */
  int pfpos = compute_ol(
      forward_primer, forward_primer_dummy_qual, forward_primer_len,
      sqp->fseq,sqp->fqual,sqp->flen,
      max(min(forward_primer_len,sqp->flen)-5,0), min_match_adapter, max_mismatch_adapter,
      false, qcut);

  int prpos = compute_ol(
      reverse_primer, reverse_primer_dummy_qual, reverse_primer_len,
      sqp->rseq,sqp->rqual,sqp->rlen,
      max(min(reverse_primer_len,sqp->rlen)-5,0), min_match_adapter, max_mismatch_adapter,
      false, qcut);

  if(pfpos >= 0 || prpos >= 0){
    //yikes, a match to the adapter at the first position!
    sqp->fseq[0] = '\0';
    sqp->fqual[0] = '\0';
    sqp->flen = 0;
    sqp->rseq[0] = '\0';
    sqp->rqual[0] = '\0';
    sqp->rlen = 0;
    sqp->rc_rqual[0] = '\0';
    sqp->rc_rseq[0] = '\0';
    return true;
  }

  /**
   * now check for the adapter after the first position of the read
   */
  int fpos = compute_ol(sqp->fseq,sqp->fqual,sqp->flen,
      forward_primer, forward_primer_dummy_qual, forward_primer_len,
      min_ol_adapter, min_match_adapter, max_mismatch_adapter,
      false, qcut);
  int rpos = compute_ol(sqp->rseq,sqp->rqual,sqp->rlen,
      reverse_primer, reverse_primer_dummy_qual, reverse_primer_len,
      min_ol_adapter, min_match_adapter, max_mismatch_adapter,
      false, qcut);
  if(fpos != CODE_NOMATCH || rpos != CODE_NOMATCH){
    //check if reads are long enough to do anything with.
    // trim adapters
    if(fpos >=0){
      sqp->fseq[fpos] = '\0';
      sqp->fqual[fpos] = '\0';
      sqp->flen = fpos;
    }
    if(rpos >= 0){
      sqp->rseq[rpos] = '\0';
      sqp->rqual[rpos] = '\0';
      sqp->rlen = rpos;
    }
    // now re-reverse complement the sequences
    strncpy(sqp->rc_rseq,sqp->rseq,sqp->rlen+1);
    strncpy(sqp->rc_rqual,sqp->rqual,sqp->rlen+1);
    rev_qual(sqp->rc_rqual, sqp->rlen);
    revcom_seq(sqp->rc_rseq, sqp->rlen);
    //adapters present
    return true;
  }

  return read_olap_adapter_trim(sqp, min_ol_adapter,
      min_match_adapter, max_mismatch_adapter,
      min_match_reads, max_mismatch_reads,
      qcut);
}
Example #5
0
int main( int argc, char* argv[] ) {
  unsigned long long num_pairs;
  unsigned long long num_merged;
  unsigned long long num_adapter;
  unsigned long long num_discarded;
  unsigned long long num_too_ambiguous_to_merge;
  unsigned long long max_pretty_print = DEF_MAX_PRETTY_PRINT;
  unsigned long long num_pretty_print = 0;
  int adapter_thresh = DEF_ADAPTER_SCORE_THRES;
  int read_thresh = DEF_READ_SCORE_THRES;
  clock_t start, end;
  //init to 0
  num_pairs = num_merged = num_adapter = num_discarded = num_too_ambiguous_to_merge = 0;
  extern char* optarg;
  bool p64 = false;
  char forward_fn[MAX_FN_LEN];
  char reverse_fn[MAX_FN_LEN];
  char forward_out_fn[MAX_FN_LEN];
  char reverse_out_fn[MAX_FN_LEN];
  char forward_discard_fn[MAX_FN_LEN];
  char reverse_discard_fn[MAX_FN_LEN];
  char merged_out_fn[MAX_FN_LEN];
  bool do_read_merging = false;
  bool print_overhang = false;
  bool write_discard=false;
  char forward_primer[MAX_SEQ_LEN+1];
  strcpy(forward_primer, DEF_FORWARD_PRIMER); //set default
  char forward_primer_dummy_qual[MAX_SEQ_LEN+1];
  char reverse_primer[MAX_SEQ_LEN+1];
  strcpy(reverse_primer, DEF_REVERSE_PRIMER); //set default
  char reverse_primer_dummy_qual[MAX_SEQ_LEN+1];
  int i;
  for(i=0;i<MAX_SEQ_LEN+1;i++){
    forward_primer_dummy_qual[i] = 'N';//phred score of 45
    reverse_primer_dummy_qual[i] = 'N';
  }
  int ich;
  int min_ol_adapter = DEF_OL2MERGE_ADAPTER;
  int min_ol_reads = DEF_OL2MERGE_READS;
  unsigned short int min_read_len =DEF_MIN_READ_LEN;
  float min_match_adapter_frac = DEF_MIN_MATCH_ADAPTER;
  float min_match_reads_frac = DEF_MIN_MATCH_READS;
  float max_mismatch_adapter_frac = DEF_MAX_MISMATCH_ADAPTER;
  float max_mismatch_reads_frac = DEF_MAX_MISMATCH_READS;

  float read_frac_thresh = DEF_READ_GAP_FRAC_CUTOFF;
  unsigned short max_mismatch_adapter[MAX_SEQ_LEN+1];
  unsigned short max_mismatch_reads[MAX_SEQ_LEN+1];
  unsigned short min_match_adapter[MAX_SEQ_LEN+1];
  unsigned short min_match_reads[MAX_SEQ_LEN+1];
  char qcut = (char)DEF_QCUT+33;
  bool pretty_print = false;
  char pretty_print_fn[MAX_FN_LEN+1];
  SQP sqp = SQP_init();
  char untrim_fseq[MAX_SEQ_LEN+1];
  char untrim_fqual[MAX_SEQ_LEN+1];
  char untrim_rseq[MAX_SEQ_LEN+1];
  char untrim_rqual[MAX_SEQ_LEN+1];
  /* No args - help!  */
  if ( argc == 1 ) {
    help(argv[0]);
  }
  int req_args = 0;
  while( (ich=getopt( argc, argv, "f:r:1:2:3:4:q:A:s:y:B:O:E:x:M:N:L:o:m:b:w:W:p:P:X:Q:t:e:Z:n:6gh" )) != -1 ) {
    switch( ich ) {

    //REQUIRED ARGUMENTS
    case 'f' :
      req_args ++;
      strcpy( forward_fn, optarg );
      break;
    case 'r' :
      req_args ++;
      strcpy( reverse_fn, optarg );
      break;
    case '1' :
      req_args ++;
      strcpy(forward_out_fn, optarg);
      break;
    case '2' :
      req_args ++;
      strcpy(reverse_out_fn, optarg);
      break;

      //OPTIONAL GENERAL ARGUMENTS
    case '3' :
      write_discard=true;
      strcpy(forward_discard_fn, optarg);
      break;
    case '4' :
      write_discard=true;
      strcpy(reverse_discard_fn, optarg);
      break;
    case 'h' :
      help(argv[0]);
      break;
    case '6' :
      p64 = true;
      break;
    case 'q' :
      qcut = atoi(optarg)+33;
      break;
    case 'L' :
      min_read_len = atoi(optarg);
      break;

      //OPTIONAL ADAPTER/PRIMER TRIMMING ARGUMENTS
    case 'A':
      strcpy(forward_primer, optarg);
      break;
    case 'B':
      strcpy(reverse_primer, optarg);
      break;
    case 'O':
      min_ol_adapter = atoi(optarg);
      break;
    case 'M':
      max_mismatch_adapter_frac = atof(optarg);
      break;
    case 'N':
      min_match_adapter_frac = atof(optarg);
      break;
    case 'b':
      aln_param_nt2nt.band_width = atoi(optarg);
      break;
    case 'Q':
      aln_param_nt2nt.gap_open = atoi(optarg);
      break;
    case 't':
      aln_param_nt2nt.gap_ext = atoi(optarg);
      break;
    case 'e':
      aln_param_nt2nt.gap_end = atoi(optarg);
      break;
    case 'Z':
      adapter_thresh = atoi(optarg);
      break;


    case 'w':
      aln_param_rd2rd.band_width = atoi(optarg);
      break;
    case 'W':
      aln_param_rd2rd.gap_open = atoi(optarg);
      break;
    case 'p':
      aln_param_rd2rd.gap_ext = atoi(optarg);
      break;
    case 'P':
      aln_param_rd2rd.gap_end = atoi(optarg);
      break;
    case 'X':
      read_frac_thresh = atof(optarg);
      break;

      //OPTIONAL MERGING ARGUMENTS
    case 'y' :
      maximum_quality = optarg[0];
      break;
    case 'g' :
      print_overhang = true;
      break;
    case 's' :
      do_read_merging = true;
      strcpy( merged_out_fn, optarg );
      break;
    case 'o':
      min_ol_reads = atoi(optarg);
      break;
    case 'm':
      max_mismatch_reads_frac = atof(optarg);
      break;
    case 'n':
      min_match_reads_frac = atof(optarg);
      break;
    case 'E':
      pretty_print = true;
      strcpy(pretty_print_fn,optarg);
      break;
    case 'x':
      max_pretty_print = atol(optarg);
      break;


    default :
      help(argv[0]);
    }
  }
  if(req_args < 4){
    fprintf(stderr, "Missing a required argument!\n");
    help(argv[0]);
  }
  start = clock();
  //allocate alignment memory

  //  int min_match = 8;
  //  int ngaps = 1;
  //  int maxglen = 3;

  // AlnParam aln_param_adapter   = {  5, 13, 19, aln_sm_read, 16, 75 };
  //


  //Calculate table matching overlap length to min matches and max mismatches
  for(i=0;i<MAX_SEQ_LEN+1;i++){
    max_mismatch_reads[i] = floor(((float)i)*max_mismatch_reads_frac);
    max_mismatch_adapter[i] = floor(((float)i)*max_mismatch_adapter_frac);
    min_match_reads[i] = ceil(((float)i)*min_match_reads_frac);
    min_match_adapter[i] = ceil(((float)i)*min_match_adapter_frac);
  }
  //get length of forward and reverse primers
  int forward_primer_len = strlen(forward_primer);
  int reverse_primer_len = strlen(reverse_primer);


  gzFile ffq = fileOpen(forward_fn, "r");
  gzFile ffqw = fileOpen(forward_out_fn,"w");
  gzFile rfq = fileOpen(reverse_fn, "r");
  gzFile rfqw = fileOpen(reverse_out_fn,"w");
  gzFile mfqw = NULL;
  gzFile ppaw = NULL;
  gzFile dffqw = NULL;
  gzFile drfqw = NULL;
  if(do_read_merging)
    mfqw = fileOpen(merged_out_fn,"w");
  if(pretty_print)
    ppaw = fileOpen(pretty_print_fn,"w");
  if(write_discard){
    dffqw = fileOpen(forward_discard_fn,"w");
    drfqw = fileOpen(reverse_discard_fn,"w");    
  }


  /**
   * Loop over all of the reads
   */
  while(next_fastqs( ffq, rfq, sqp, p64 )){ //returns false when done
    update_spinner(num_pairs++);


    AlnAln *faaln, *raaln, *fraln;

    //save a copy of the original sequences/qualities first
    strcpy(untrim_fseq,sqp->fseq);
    strcpy(untrim_fqual,sqp->fqual);
    strcpy(untrim_rseq,sqp->rseq);
    strcpy(untrim_rqual,sqp->rqual);

    faaln = aln_stdaln_aux(sqp->fseq, forward_primer, &aln_param_nt2nt,
        ALN_TYPE_LOCAL, adapter_thresh , sqp->flen, forward_primer_len);
    raaln = aln_stdaln_aux(sqp->rseq, reverse_primer, &aln_param_nt2nt,
        ALN_TYPE_LOCAL, adapter_thresh, sqp->rlen, reverse_primer_len);

    //check for direct adapter match.
    if(adapter_trim(sqp, min_ol_adapter,
        forward_primer, forward_primer_dummy_qual,
        forward_primer_len,
        reverse_primer, reverse_primer_dummy_qual,
        reverse_primer_len,
        min_match_adapter,
        max_mismatch_adapter,
        min_match_reads,
        max_mismatch_reads,
        qcut) ||
        faaln->score >= adapter_thresh ||
        raaln->score >= adapter_thresh){
      num_adapter++; //adapter present
      //print it if user wants
      if(pretty_print && num_pretty_print < max_pretty_print){
        //void pretty_print_alignment_stdaln(gzFile out, SQP sqp, AlnAln *aln, bool first_adapter, bool second_adapter)
        if(faaln->score >= adapter_thresh){
          num_pretty_print++;
          pretty_print_alignment_stdaln(ppaw,sqp,faaln,true,false,false);
        }
        if(raaln->score >= adapter_thresh){
          num_pretty_print++;
          pretty_print_alignment_stdaln(ppaw,sqp,raaln,false,true,false);
        }
      }

      //do stuff to it
      //assume full length adapter and squish it down to the read with no gaps
      int rpos,fpos;
      rpos = fpos = (- MAX_SEQ_LEN);
      if(faaln->score >= adapter_thresh){
        fpos = max(faaln->start1 - faaln->start2,0);
      }
      if(raaln->score >= adapter_thresh){
        rpos = max(raaln->start1 - raaln->start2,0);
      }

      //make rlen the minimum of the two adapter search methods
      if(rpos >= 0){
        sqp->rlen = min(sqp->rlen,rpos);
      }

      //make flen the minimum of the two adapter search methods
      if(fpos >= 0){
        sqp->flen = min(sqp->flen,fpos);
      }

      if(sqp->flen < min_read_len || sqp->rlen < min_read_len){
        num_discarded++;
	if(write_discard){
	  write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	  write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	}
        goto CLEAN_ADAPTERS;
      }else{ //trim the adapters
        sqp->fseq[sqp->flen] = '\0';
        sqp->fqual[sqp->flen] = '\0';
        sqp->rseq[sqp->rlen] = '\0';
        sqp->rqual[sqp->rlen] = '\0';
        strncpy(sqp->rc_rseq,sqp->rseq,sqp->rlen+1); //move regular reads now trimmed into RC read's place
        strncpy(sqp->rc_rqual,sqp->rqual,sqp->rlen+1);
        rev_qual(sqp->rc_rqual, sqp->rlen);        //amd re-reverse the RC reads
        revcom_seq(sqp->rc_rseq, sqp->rlen);
      }


      //do a nice global alignment between two reads, and print consensus
      fraln = aln_stdaln_aux(sqp->fseq, sqp->rc_rseq, &aln_param_rd2rd,
          ALN_TYPE_GLOBAL, 1, sqp->flen, sqp->rlen);

      //calculate the minimum score we are willing to accept to merge the reads
      //basically this is saying that 7/8 of the read must overlap perfectly

      read_thresh = (((int)sqp->flen) + ((int)sqp->rlen)) -
          (((int)sqp->flen) * read_frac_thresh * aln_param_rd2rd.gap_ext) -
          (((int)sqp->rlen) * read_frac_thresh * aln_param_rd2rd.gap_ext) -
          (aln_param_rd2rd.gap_open*2) - (aln_param_rd2rd.gap_end*2);
      //now lets put something useful in the alignment suboptimal score thing since right now it
      //is just left blank:
      //fprintf(stderr, "rt:%d\tfl:%d\trl:%d\trft:%f\tgx:%d\tgo:%d\tge%d\n", read_thresh,((int)sqp->flen),((int)sqp->rlen),read_frac_thresh,aln_param_rd2rd.gap_ext,aln_param_rd2rd.gap_open,aln_param_rd2rd.gap_end);
      fraln->subo = read_thresh;

      if(do_read_merging && fraln->score > read_thresh){
        //if we want read merging,
        //and the alignment score is better than the threshold just calculated...

        //write the merged sequence
        fill_merged_sequence(sqp, fraln, true);
        if(pretty_print && num_pretty_print < max_pretty_print){
          num_pretty_print++;
          pretty_print_alignment_stdaln(ppaw,sqp,fraln,false,false,true);
        }
        if(strlen(sqp->merged_seq) >= min_read_len && strlen(sqp->merged_qual) >= min_read_len){
          num_merged++;
          write_fastq(mfqw,sqp->fid,sqp->merged_seq,sqp->merged_qual);
        }
        else{
          num_discarded++;
	  if(write_discard){
	    write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	    write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	  }
        }
      }else if(fraln->score > read_thresh){
        // we know that the adapters are present, trimmed, and the resulting
        // read lengths are both long enough to print.
        // We also know that we aren't doing merging.
        // Now we just need to print.
        if(pretty_print && num_pretty_print < max_pretty_print){
          num_pretty_print++;
          pretty_print_alignment_stdaln(ppaw,sqp,fraln,false,false,true);
        }




        //do end polishing to take care of examples like the following:
        //          Read Alignment Score:59, Suboptimal Score:-85
        //          ID:HWI-ST593:1:1101:14566:7002#ACA/1
        //          READ1: ------------ATACAACTCGCTGACTTTGTCCTGGCATTTGACATATGCCTCGTAGTCTGCAAAGACTTTAAACCGGTCATGGTGGAACAGCATGTTGA
        //                             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
        //          READ2: CTCTTCCGATCTATACAACTCGCTGACTTTGTCCTGGCATTTGACATATGCCTCGTAGTCTGCAAAGACTTTAAACCGGTCATGGTGGAACAGCATGTTG-



        make_blunt_ends(sqp,fraln);

        if(strlen(sqp->fseq) >= min_read_len &&
            strlen(sqp->fqual) >= min_read_len &&
            strlen(sqp->rseq) >= min_read_len &&
            strlen(sqp->rqual) >= min_read_len){

          write_fastq(ffqw, sqp->fid, sqp->fseq, sqp->fqual);
          write_fastq(rfqw, sqp->rid, sqp->rseq, sqp->rqual);
        }else{
          num_discarded++;
	  if(write_discard){
	    write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	    write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	  }
        }


      }else{ //there was a bad looking read-read alignment, so lets not risk it and junk it
        num_discarded++;
	if(write_discard){
	  //write_fastq(dffqw, sqp->fid, sqp->fseq, sqp->fqual);
	  //write_fastq(drfqw, sqp->rid, sqp->rseq, sqp->rqual);
	  write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	  write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	}
      }
    }else{
      //no adapters present
      //check for strong read overlap to assist trimming ends of adapters from end of read
      if(do_read_merging){
        if(read_merge(sqp, min_ol_reads, min_match_reads, max_mismatch_reads, qcut)){
          //print merged output
          if(strlen(sqp->merged_seq) >= min_read_len &&
              strlen(sqp->merged_qual) >= min_read_len){
            num_merged++;
            write_fastq(mfqw,sqp->fid,sqp->merged_seq,sqp->merged_qual);
            if(pretty_print && num_pretty_print < max_pretty_print){
              num_pretty_print++;
              pretty_print_alignment(ppaw,sqp,qcut,false); //false b/c merged input in fixed order
            }
          }else{
            num_discarded++;
	    if(write_discard){
	      write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	      write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	    }
          }
        }else{
          //no significant overlap so just write them
          if(strlen(sqp->fseq) >= min_read_len &&
              strlen(sqp->fqual) >= min_read_len &&
              strlen(sqp->rseq) >= min_read_len &&
              strlen(sqp->rqual) >= min_read_len){
            write_fastq(ffqw, sqp->fid, sqp->fseq, sqp->fqual);
            write_fastq(rfqw, sqp->rid, sqp->rseq, sqp->rqual);
          }else{
            num_discarded++;
	    if(write_discard){
	      write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	      write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	    }
          }

        }
        //done
        goto CLEAN_ADAPTERS;
      }else{ //just write reads to output fastqs
        if(strlen(sqp->fseq) >= min_read_len &&
            strlen(sqp->fqual) >= min_read_len &&
            strlen(sqp->rseq) >= min_read_len &&
            strlen(sqp->rqual) >= min_read_len){
          write_fastq(ffqw, sqp->fid, sqp->fseq, sqp->fqual);
          write_fastq(rfqw, sqp->rid, sqp->rseq, sqp->rqual);
        }else{
          num_discarded++;
	  if(write_discard){
	    write_fastq(dffqw, sqp->fid, untrim_fseq, untrim_fqual);
	    write_fastq(drfqw, sqp->rid, untrim_rseq, untrim_rqual);
	  }
        }
        goto CLEAN_ADAPTERS;
      }
    }


    /**
     * Section for heirarchial cleanup
     *
     * In every case we will at least have to free up the alignment between the adapter and two reads.
     * however in some cases there will be an additional alignment between the two reads. We can do
     * good cleanup in this case with gotos
     */
    aln_free_AlnAln(fraln);

    CLEAN_ADAPTERS:
    aln_free_AlnAln(faaln);
    aln_free_AlnAln(raaln);

    //End the loop over reads
  }
  end = clock();
  double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
  fprintf(stderr,"\nPairs Processed:\t%lld\n",num_pairs);
  fprintf(stderr,"Pairs Merged:\t%lld\n",num_merged);
  fprintf(stderr,"Pairs With Adapters:\t%lld\n",num_adapter);
  fprintf(stderr,"Pairs Discarded:\t%lld\n",num_discarded);
  fprintf(stderr,"CPU Time Used (Minutes):\t%lf\n",cpu_time_used/60.0);



  SQP_destroy(sqp);
  gzclose(ffq);
  gzclose(ffqw);
  gzclose(rfq);
  gzclose(rfqw);
  if(mfqw != NULL)
    gzclose(mfqw);
  if(ppaw != NULL)
    gzclose(ppaw);
  if(dffqw != NULL)
    gzclose(dffqw);
  if(drfqw != NULL)
    gzclose(drfqw);
  return 0;
}