Example #1
0
WT_Boolean    WT_Alignment::operator== (WT_Alignment const &
                                        alignment) const
{
    if(get_alignment_description() == alignment.get_alignment_description())
        return WD_True;
    return WD_False;
}
Example #2
0
/****************************************************************************
 * Remove from an alignment any sequence whose ID is not in a given list.
 *
 * N.B. It is NOT an error for the given list to contain sequence IDs that 
 * are not in the alignment.
 ****************************************************************************/
ALIGNMENT_T* remove_alignment_seqs
  (STRING_LIST_T* seqs_to_keep,
   ALIGNMENT_T*   alignment)
{
  // Extract the names of the sequences in the alignment.
  STRING_LIST_T* alignment_species = get_species_names(alignment);
  int num_species = get_num_strings(alignment_species);

  // Count how many sequences will be in the new alignment.
  int i_species;
  int num_final = 0;
  for (i_species = 0; i_species < num_species; i_species++) {
    char* this_species = get_nth_string(i_species, alignment_species);

    if (have_string(this_species, seqs_to_keep)) {
      num_final++;
    } else {
      if (verbosity >= NORMAL_VERBOSE) {
        fprintf(stderr, "Removing %s from alignment.\n", this_species);
      }
    }
  }

  // Allocate space for the new sequences.
  SEQ_T** new_sequences = (SEQ_T**)mm_malloc(num_final * sizeof(SEQ_T*));

  // Copy the sequences.
  int final_index = 0;
  num_species = get_num_strings(seqs_to_keep);
  for (i_species = 0; i_species < num_species; i_species++) {
    char* this_species = get_nth_string(i_species, seqs_to_keep);

    // If the requested ID is in the alignment, then copy over the sequence.
    if (have_string(this_species, alignment_species)) {
      SEQ_T* this_seq 
	= get_alignment_sequence_by_name(this_species, alignment);
      new_sequences[final_index] = copy_sequence(this_seq);
      final_index++;
    }
  }

  // Allocate and return the new alignment.
  
  char *consensus = NULL;
  copy_string(&consensus, get_consensus_string(alignment));
  ALIGNMENT_T* new_alignment
    = allocate_alignment(get_alignment_name(alignment),
			 get_alignment_description(alignment),
			 num_final,
			 new_sequences,
			 consensus);

  return(new_alignment);
}
Example #3
0
/****************************************************************************
 * Extract a small alignment out of the middle of a larger alignment.
 ****************************************************************************/
ALIGNMENT_T* extract_subalignment
  (int start,
   int width,
   ALIGNMENT_T* alignment)
{
  int num_sequences = get_num_aligned_sequences(alignment);
  SEQ_T** sequences = get_alignment_sequences(alignment);
  SEQ_T** subsequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*));

  // Extract the specified columns into a new list of sequences.
  int i_seq = 0;
  char* subsequence = mm_malloc((width + 1) * sizeof(char));
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    SEQ_T* this_seq = sequences[i_seq];
    char* raw_seq = get_raw_sequence(this_seq);
    strncpy(subsequence, raw_seq + start, width);
    subsequence[width] = '\0';
    subsequences[i_seq] = 
      allocate_seq(get_seq_name(this_seq),
		   get_seq_description(this_seq),
		   get_seq_offset(this_seq), 
		   subsequence);
  }

  // Extract the consensus string in the specified columns.
  char* consensus = get_consensus_string(alignment);
  char* subconsensus = mm_malloc(sizeof(char) * (width + 1));
  strncpy(subconsensus, consensus + start, width);
  subconsensus[width] = '\0';

  // Allocate and return the new alignment.
  ALIGNMENT_T* subalignment 
    = allocate_alignment(get_alignment_name(alignment),
			 get_alignment_description(alignment),
			 num_sequences,
			 subsequences,
			 subconsensus);

  // Free local dynamic memory.
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    free_seq(subsequences[i_seq]);
  }
  myfree(subsequences);
  myfree(subsequence);
  return(subalignment);
}
Example #4
0
WT_Result WT_Alignment::serialize(WT_File & file) const
{
    WD_CHECK (file.dump_delayed_drawable());

    if (file.heuristics().allow_binary_data())
    {
        WD_CHECK (file.write((WT_Byte) '{'));
        WD_CHECK (file.write((WT_Integer32) (sizeof(WT_Unsigned_Integer16) + // for the opcode
                                             sizeof(WT_Unsigned_Integer16) + // orientation description
                                             sizeof(WT_Byte)                 // The closing "}"
                                                )));
        WD_CHECK (file.write((WT_Unsigned_Integer16)  WD_EXBO_ALIGNMENT));
        WD_CHECK (file.write((WT_Unsigned_Integer16)
            get_alignment_description()));
        WD_CHECK (file.write((WT_Byte) '}'));
    }
    else {

        //Extended ASCII format
        WD_CHECK (file.write_geom_tab_level());
        WD_CHECK (file.write("(Alignment "));

        if(get_alignment_description() == WT_Alignment::Align_Center) {
            WD_CHECK (file.write_quoted_string("Align_Center      "));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Title_Block) {
            WD_CHECK (file.write_quoted_string("Align_Title_Block ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Top) {
            WD_CHECK (file.write_quoted_string("Align_Top         ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Bottom) {
            WD_CHECK (file.write_quoted_string("Align_Bottom      ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Left) {
            WD_CHECK (file.write_quoted_string("Align_Left        ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Right) {
            WD_CHECK (file.write_quoted_string("Align_Right       ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Top_Left) {
            WD_CHECK (file.write_quoted_string("Align_Top_Left    ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Top_Right) {
            WD_CHECK (file.write_quoted_string("Align_Top_Right   ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Bottom_Left) {
            WD_CHECK (file.write_quoted_string("Align_Bottom_Left ", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_Bottom_Right) {
            WD_CHECK (file.write_quoted_string("Align_Bottom_Right", WD_True));
        }
        else if (get_alignment_description() == WT_Alignment::Align_None) {
            WD_CHECK (file.write_quoted_string("Align_None        ", WD_True));
        }
        else
            return WT_Result::Internal_Error;

        WD_CHECK (file.write(")"));
    }

    return WT_Result::Success;
}
Example #5
0
/****************************************************************************
 * Remove from the alignment all columns that contain gaps for the
 * specified species.
 ****************************************************************************/
ALIGNMENT_T* remove_alignment_gaps
  (char*        species,
   ALIGNMENT_T* alignment)
{
  // Locate this species in the alignment.
  int species_index = get_index_in_string_list(species, 
					       get_species_names(alignment));
  if (species_index == -1) {
    die("Can't find %s in alignment.\n", species);
  }
  SEQ_T* this_seq = get_alignment_sequence(species_index, alignment);

  // Get the dimensions of the original matrix.
  int num_sequences = get_num_aligned_sequences(alignment);
  int alignment_length = get_alignment_length(alignment);

  // Allocate memory for raw sequences that will constitute the new alignment.
  char** raw_sequences = (char**)mm_malloc(sizeof(char*) * num_sequences);
  int i_seq = 0;
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    raw_sequences[i_seq] 
      = (char*)mm_calloc(alignment_length + 1, sizeof(char*));
  }
  char* consensus = get_consensus_string(alignment);
  char* new_consensus 
    = (char*)mm_calloc(alignment_length + 1, sizeof(char*));

  // Iterate over all columns.
  int i_column;
  int i_raw = 0;
  for (i_column = 0; i_column < alignment_length; i_column++) {

    // Is there a gap?
    char this_char = get_seq_char(i_column, this_seq);
    if ((this_char != '-') && (this_char != '.')) {

      // If no gap, then copy over this column.
      for (i_seq = 0; i_seq < num_sequences; i_seq++) {
	SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment);
	char this_char = get_seq_char(i_column, this_sequence);
				      
	raw_sequences[i_seq][i_raw] = this_char;
      }
      new_consensus[i_raw] = consensus[i_column];
      i_raw++;
    }
  }

  // Create new sequence objects.
  SEQ_T** new_sequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*));
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment);
    new_sequences[i_seq] = allocate_seq(get_seq_name(this_sequence),
					get_seq_description(this_sequence),
					get_seq_offset(this_sequence),
					raw_sequences[i_seq]);
  }

  // Allocate and return the new alignment.
  ALIGNMENT_T* new_alignment
    = allocate_alignment(get_alignment_name(alignment),
			 get_alignment_description(alignment),
			 num_sequences,
			 new_sequences,
			 new_consensus);
  
  // Free local dynamic memory.
  for (i_seq = 0; i_seq < num_sequences; i_seq++) {
    myfree(raw_sequences[i_seq]);
    free_seq(new_sequences[i_seq]);
  }
  myfree(raw_sequences);
  myfree(new_sequences);
  myfree(new_consensus);

  return(new_alignment);
}