예제 #1
0
void update_seed_right(alig_out_t *alig_out, seed_t *seed, seed_cal_t *cal) {
  if (alig_out->match > 0) {
  //if (score > 0 || (alig_out.map_len1 + suffix_len) > 20) {

    // update seed
    seed->num_mismatches += alig_out->mismatch;
    seed->num_open_gaps += alig_out->gap_open;
    seed->num_extend_gaps += alig_out->gap_extend;
    
    seed->read_end += alig_out->map_len1;
    seed->genome_end += alig_out->map_len2;

    // update cigar with the sw output
    if (alig_out->cigar.num_ops > 0) {
      cigar_concat(&alig_out->cigar, &seed->cigar);
    }    
  }
}
예제 #2
0
void update_seed_left(alig_out_t *alig_out, seed_t *seed, seed_cal_t *cal) {
  if (alig_out->match > 0) {

    // update seed
    seed->num_mismatches += alig_out->mismatch;
    seed->num_open_gaps += alig_out->gap_open;
    seed->num_extend_gaps += alig_out->gap_extend;
    
    seed->read_start -= alig_out->map_len1;
    seed->genome_start -= alig_out->map_len2;

    // update cigar with the sw output
    if (alig_out->cigar.num_ops > 0) {
      cigar_t cigar;
      cigar_init(&cigar);

      cigar_concat(&seed->cigar, &alig_out->cigar);
      cigar_init(&seed->cigar);
      cigar_copy(&seed->cigar, &alig_out->cigar);
    }    
  }
}
예제 #3
0
int sa_sam_writer(void *data) {
  sa_wf_batch_t *wf_batch = (sa_wf_batch_t *) data;
  
  sa_mapping_batch_t *mapping_batch = (sa_mapping_batch_t *) wf_batch->mapping_batch;
  if (mapping_batch == NULL) {
    printf("bam_writer1: error, NULL mapping batch\n");
    return 0;
  }
  /*
  for (int i = 0; i < NUM_COUNTERS; i++) {
    counters[i] += mapping_batch->counters[i];
  }
  */
  #ifdef _TIMING
  for (int i = 0; i < NUM_TIMING; i++) {
    func_times[i] += mapping_batch->func_times[i];
  }
  #endif

  int num_mismatches, num_cigar_ops;
  size_t flag, pnext = 0, tlen = 0;
  char *cigar_string, *cigar_M_string, *rnext = "*";

  fastq_read_t *read;
  array_list_t *read_list = mapping_batch->fq_reads;

  array_list_t *mapping_list, *mate_list;
  FILE *out_file = (FILE *) wf_batch->writer_input->bam_file;

  sa_genome3_t *genome = wf_batch->sa_index->genome;

  size_t num_reads, num_mappings, num_mate_mappings;
  num_reads = mapping_batch->num_reads;

  if (mapping_batch->options->pair_mode != SINGLE_END_MODE) {
    // PAIR MODE
    int len;
    char *sequence, *quality;

    char *seq, *opt_fields;
    alignment_t *alig;
  
    for (size_t i = 0; i < num_reads; i++) {
      read = (fastq_read_t *) array_list_get(i, read_list);
      //      seq = read->sequence;
      /*
      if (i % 2 == 0)  {
	mate_list = mapping_batch->mapping_lists[i+1];
	num_mate_mappings = array_list_size(mate_list);
      } else {
	mate_list = mapping_list;
	num_mate_mappings = num_mappings;
      }
      */
      mapping_list = mapping_batch->mapping_lists[i];
      num_mappings = array_list_size(mapping_list);
      num_total_mappings += num_mappings;

      #ifdef _VERBOSE
      if (num_mappings > 1) {
	num_dup_reads++;
	num_total_dup_reads += num_mappings;
      }
      #endif
      
      if (num_mappings > 0) {
	num_mapped_reads++;
	if (num_mappings > 1) {
	  num_multihit_reads++;
	}
	for (size_t j = 0; j < num_mappings; j++) {
	  alig = (alignment_t *) array_list_get(j, mapping_list);
	  /*
	  // update alignment
	  alig->secondary_alignment = 0;
	  if (num_mate_mappings != 1) {
	    alig->is_mate_mapped = 0;
	    alig->is_paired_end_mapped = 0;
	    alig->mate_strand = 0;
	  }
	  */
	  if (alig->optional_fields) {
	    opt_fields = (char *) calloc(strlen(alig->optional_fields) + 100, sizeof(char));
	    sprintf(opt_fields, "NH:i:%i\t%s", num_mappings, alig->optional_fields);
	    //	    sprintf(opt_fields, "NH:i:%i\t%s\tXU:i:%i", num_mappings, alig->optional_fields, mapping_batch->status[i]);
	  } else {
	    opt_fields = (char *) calloc(100, sizeof(char));
	    sprintf(opt_fields, "NH:i:%i", num_mappings);
	    //	    sprintf(opt_fields, "NH:i:%i\tXU:i:%i", num_mappings, mapping_batch->status[i]);
	  }
	  /*
	  // update alignment
	  alig->secondary_alignment = 0;
	  if (num_mate_mappings != 1) {
	    alig->is_mate_mapped = 0;
	    alig->is_paired_end_mapped = 0;
	    alig->mate_strand = 0;
	  }
	  */
	  flag = 0;
	  if (alig->is_paired_end)                              flag += BAM_FPAIRED;
	  if (alig->is_paired_end_mapped)                       flag += BAM_FPROPER_PAIR;
	  if (!alig->is_seq_mapped)                             flag += BAM_FUNMAP;   
	  if ((!alig->is_mate_mapped) && (alig->is_paired_end)) flag += BAM_FMUNMAP;
	  if (alig->mate_strand)                                flag += BAM_FMREVERSE;
	  if (alig->pair_num == 1)	                        flag += BAM_FREAD1;
	  if (alig->pair_num == 2)                              flag += BAM_FREAD2;
	  if (alig->secondary_alignment)                        flag += BAM_FSECONDARY;
	  if (alig->fails_quality_check)                        flag += BAM_FQCFAIL;
	  if (alig->pc_optical_duplicate)                       flag += BAM_FDUP;
	  if (alig->seq_strand)                                 flag += BAM_FREVERSE;

	  fprintf(out_file, "%s\t%lu\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n", 
		  read->id,
		  flag,
		  genome->chrom_names[alig->chromosome],
		  alig->position + 1,
		  (num_mappings > 1 ? 0 : alig->mapq), //60, //(alig->map_quality > 3 ? 0 : alig->map_quality),
		  alig->cigar,
		  (alig->chromosome == alig->mate_chromosome ? "=" : genome->chrom_names[alig->mate_chromosome]),
		  alig->mate_position + 1,
		  alig->template_length,
		  alig->sequence,
		  alig->quality,
		  opt_fields
		  );

	  // free memory
	  free(opt_fields);
	  alignment_free(alig);	 
	} // end for num_mappings
      } else {
	num_unmapped_reads++;

	opt_fields = (char *) calloc(100, sizeof(char));
	sprintf(opt_fields, "XM:i:%i XU:i:%i", num_mappings, mapping_batch->status[i]);

	if (read->adapter) {
	  len = read->length + abs(read->adapter_length);
	  sequence = (char *) malloc(len + 1);
	  quality = (char *) malloc(len + 1);

	  if (read->adapter_length < 0) {
	    strcpy(quality, read->adapter_quality);
	    strcat(quality, read->quality);
	  } else {
	    strcpy(quality, read->quality);
	    strcat(quality, read->adapter_quality);
	  }
	  
	  if ((read->adapter_strand == 0 && read->adapter_length < 0) || 
	      (read->adapter_strand == 1 && read->adapter_length > 0)) {
	    strcpy(sequence, read->adapter);
	    strcat(sequence, read->sequence);
	  } else {
	    strcpy(sequence, read->sequence);
	    strcat(sequence, read->adapter);
	  }

	  sequence[len] = 0; 
	  quality[len] = 0; 
	} else {
	  sequence = read->sequence;
	  quality = read->quality;
	}

	fprintf(out_file, "%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\t%s\n", 
		read->id,
		sequence,
		quality,
		opt_fields
		);

	free(opt_fields);

	if (read->adapter) {
	  free(sequence);
	  free(quality);
	}
      }
      array_list_free(mapping_list, (void *) NULL);
    }
  } else {
    // SINGLE MODE
    int len, mapq;
    char *seq;
    seed_cal_t *cal;

    cigar_t *cigar;
    char *sequence, *revcomp, *quality;

    for (size_t i = 0; i < num_reads; i++) {
      read = (fastq_read_t *) array_list_get(i, read_list);
      mapping_list = mapping_batch->mapping_lists[i];
      num_mappings = array_list_size(mapping_list);
      num_total_mappings += num_mappings;

      #ifdef _VERBOSE
      if (num_mappings > 1) {
	num_dup_reads++;
	num_total_dup_reads += num_mappings;
      }
      #endif
      
      if (num_mappings > 0) {
	num_mapped_reads++;
	if (num_mappings > 1) {
	  num_multihit_reads++;
	}

	for (size_t j = 0; j < num_mappings; j++) {
	  cal = (seed_cal_t *) array_list_get(j, mapping_list);
	  
	  if (read->adapter) {
	    // sequences and cigar
	    len = read->length + abs(read->adapter_length);
	    sequence = (char *) malloc(len + 1);
	    revcomp = (char *) malloc(len + 1);
	    quality = (char *) malloc(len + 1);
	    cigar = cigar_new_empty();

	    if (read->adapter_length < 0) {
	      strcpy(quality, read->adapter_quality);
	      strcat(quality, read->quality);
	    } else {
	      strcpy(quality, read->quality);
	      strcat(quality, read->adapter_quality);
	    }
	    
	    if ( (cal->strand == 1 && 
		  ((read->adapter_strand == 0 && read->adapter_length > 0) || 
		   (read->adapter_strand == 1 && read->adapter_length < 0)))
		 ||
		 (cal->strand == 0 && 
		  ((read->adapter_strand == 0 && read->adapter_length < 0) ||
		   (read->adapter_strand == 1 && read->adapter_length > 0))) ) {
	      strcpy(sequence, read->adapter);
	      strcat(sequence, read->sequence);
	      strcpy(revcomp, read->adapter_revcomp);
	      strcat(revcomp, read->revcomp);
	      
	      cigar_append_op(abs(read->adapter_length), 'S', cigar);
	      cigar_concat(&cal->cigar, cigar);
	    } else {
	      strcpy(sequence, read->sequence);
	      strcat(sequence, read->adapter);
	      strcpy(revcomp, read->revcomp);
	      strcat(revcomp, read->adapter_revcomp);
	      
	      cigar_concat(&cal->cigar, cigar);
	      cigar_append_op(read->adapter_length, 'S', cigar);
	    }
	    sequence[len] = 0; 
	    revcomp[len] = 0; 
	    quality[len] = 0; 
	  } else {
	    // sequences and cigar
	    sequence = read->sequence;
	    revcomp = read->revcomp;
	    quality = read->quality;
	    cigar = &cal->cigar;
	  }

	  if (cal->strand) {
	    flag = 16;
	    seq = revcomp;
	  } else {
	    flag = 0;
	    seq = sequence;
	  }

	  /*
	  if (i == 0) {
	    flag += BAM_FSECONDARY;
	  }
	  */

	  cigar_string = cigar_to_string(cigar);
	  cigar_M_string = cigar_to_M_string(&num_mismatches, &num_cigar_ops, cigar);
	  if (num_mappings > 1) {
	    cal->mapq = 0;
	  }
	  fprintf(out_file, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%lu\t%i\t%s\t%s\tNH:i:%i\tNM:i:%i\n", 
		  read->id,
		  flag,
		  genome->chrom_names[cal->chromosome_id],
		  cal->start + 1,
		  (num_mappings == 1 ? cal->mapq : 0),
		  cigar_M_string,
		  rnext,
		  pnext,
		  tlen,
		  seq,
		  quality,
		  num_mappings,
		  num_mismatches
		  );

	  // free memory
	  free(cigar_M_string);
	  free(cigar_string);
	  seed_cal_free(cal);	 
	  if (read->adapter) {
	    free(sequence);
	    free(revcomp);
	    free(quality);
	    cigar_free(cigar);
	  }
	}
      } else {
	num_unmapped_reads++;

	if (read->adapter) {
	  // sequences and cigar
	  len = read->length + abs(read->adapter_length);
	  sequence = (char *) malloc(len + 1);
	  quality = (char *) malloc(len + 1);

	  if (read->adapter_length < 0) {
	    strcpy(quality, read->adapter_quality);
	    strcat(quality, read->quality);
	  } else {
	    strcpy(quality, read->quality);
	    strcat(quality, read->adapter_quality);
	  }
	  
	  if ((read->adapter_strand == 0 && read->adapter_length < 0) || 
	      (read->adapter_strand == 1 && read->adapter_length > 0)) {
	    strcpy(sequence, read->adapter);
	    strcat(sequence, read->sequence);
	  } else {
	    strcpy(sequence, read->sequence);
	    strcat(sequence, read->adapter);
	  }

	  sequence[len] = 0; 
	  quality[len] = 0; 
	} else {
	  // sequences
	  sequence = read->sequence;
	  quality = read->quality;
	}
	
	fprintf(out_file, "%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\n", 
		read->id,
		sequence,
		quality
		);

	if (read->adapter) {
	  free(sequence);
	  free(quality);
	}
      }
      
      array_list_free(mapping_list, (void *) NULL);
    } // end for num_reads
  }

  // free memory
  sa_mapping_batch_free(mapping_batch);

  if (wf_batch) sa_wf_batch_free(wf_batch);

  return 0;
}