WT_Boolean WT_Alignment::operator== (WT_Alignment const & alignment) const { if(get_alignment_description() == alignment.get_alignment_description()) return WD_True; return WD_False; }
/**************************************************************************** * Remove from an alignment any sequence whose ID is not in a given list. * * N.B. It is NOT an error for the given list to contain sequence IDs that * are not in the alignment. ****************************************************************************/ ALIGNMENT_T* remove_alignment_seqs (STRING_LIST_T* seqs_to_keep, ALIGNMENT_T* alignment) { // Extract the names of the sequences in the alignment. STRING_LIST_T* alignment_species = get_species_names(alignment); int num_species = get_num_strings(alignment_species); // Count how many sequences will be in the new alignment. int i_species; int num_final = 0; for (i_species = 0; i_species < num_species; i_species++) { char* this_species = get_nth_string(i_species, alignment_species); if (have_string(this_species, seqs_to_keep)) { num_final++; } else { if (verbosity >= NORMAL_VERBOSE) { fprintf(stderr, "Removing %s from alignment.\n", this_species); } } } // Allocate space for the new sequences. SEQ_T** new_sequences = (SEQ_T**)mm_malloc(num_final * sizeof(SEQ_T*)); // Copy the sequences. int final_index = 0; num_species = get_num_strings(seqs_to_keep); for (i_species = 0; i_species < num_species; i_species++) { char* this_species = get_nth_string(i_species, seqs_to_keep); // If the requested ID is in the alignment, then copy over the sequence. if (have_string(this_species, alignment_species)) { SEQ_T* this_seq = get_alignment_sequence_by_name(this_species, alignment); new_sequences[final_index] = copy_sequence(this_seq); final_index++; } } // Allocate and return the new alignment. char *consensus = NULL; copy_string(&consensus, get_consensus_string(alignment)); ALIGNMENT_T* new_alignment = allocate_alignment(get_alignment_name(alignment), get_alignment_description(alignment), num_final, new_sequences, consensus); return(new_alignment); }
/**************************************************************************** * Extract a small alignment out of the middle of a larger alignment. ****************************************************************************/ ALIGNMENT_T* extract_subalignment (int start, int width, ALIGNMENT_T* alignment) { int num_sequences = get_num_aligned_sequences(alignment); SEQ_T** sequences = get_alignment_sequences(alignment); SEQ_T** subsequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*)); // Extract the specified columns into a new list of sequences. int i_seq = 0; char* subsequence = mm_malloc((width + 1) * sizeof(char)); for (i_seq = 0; i_seq < num_sequences; i_seq++) { SEQ_T* this_seq = sequences[i_seq]; char* raw_seq = get_raw_sequence(this_seq); strncpy(subsequence, raw_seq + start, width); subsequence[width] = '\0'; subsequences[i_seq] = allocate_seq(get_seq_name(this_seq), get_seq_description(this_seq), get_seq_offset(this_seq), subsequence); } // Extract the consensus string in the specified columns. char* consensus = get_consensus_string(alignment); char* subconsensus = mm_malloc(sizeof(char) * (width + 1)); strncpy(subconsensus, consensus + start, width); subconsensus[width] = '\0'; // Allocate and return the new alignment. ALIGNMENT_T* subalignment = allocate_alignment(get_alignment_name(alignment), get_alignment_description(alignment), num_sequences, subsequences, subconsensus); // Free local dynamic memory. for (i_seq = 0; i_seq < num_sequences; i_seq++) { free_seq(subsequences[i_seq]); } myfree(subsequences); myfree(subsequence); return(subalignment); }
WT_Result WT_Alignment::serialize(WT_File & file) const { WD_CHECK (file.dump_delayed_drawable()); if (file.heuristics().allow_binary_data()) { WD_CHECK (file.write((WT_Byte) '{')); WD_CHECK (file.write((WT_Integer32) (sizeof(WT_Unsigned_Integer16) + // for the opcode sizeof(WT_Unsigned_Integer16) + // orientation description sizeof(WT_Byte) // The closing "}" ))); WD_CHECK (file.write((WT_Unsigned_Integer16) WD_EXBO_ALIGNMENT)); WD_CHECK (file.write((WT_Unsigned_Integer16) get_alignment_description())); WD_CHECK (file.write((WT_Byte) '}')); } else { //Extended ASCII format WD_CHECK (file.write_geom_tab_level()); WD_CHECK (file.write("(Alignment ")); if(get_alignment_description() == WT_Alignment::Align_Center) { WD_CHECK (file.write_quoted_string("Align_Center ")); } else if (get_alignment_description() == WT_Alignment::Align_Title_Block) { WD_CHECK (file.write_quoted_string("Align_Title_Block ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Top) { WD_CHECK (file.write_quoted_string("Align_Top ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Bottom) { WD_CHECK (file.write_quoted_string("Align_Bottom ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Left) { WD_CHECK (file.write_quoted_string("Align_Left ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Right) { WD_CHECK (file.write_quoted_string("Align_Right ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Top_Left) { WD_CHECK (file.write_quoted_string("Align_Top_Left ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Top_Right) { WD_CHECK (file.write_quoted_string("Align_Top_Right ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Bottom_Left) { WD_CHECK (file.write_quoted_string("Align_Bottom_Left ", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_Bottom_Right) { WD_CHECK (file.write_quoted_string("Align_Bottom_Right", WD_True)); } else if (get_alignment_description() == WT_Alignment::Align_None) { WD_CHECK (file.write_quoted_string("Align_None ", WD_True)); } else return WT_Result::Internal_Error; WD_CHECK (file.write(")")); } return WT_Result::Success; }
/**************************************************************************** * Remove from the alignment all columns that contain gaps for the * specified species. ****************************************************************************/ ALIGNMENT_T* remove_alignment_gaps (char* species, ALIGNMENT_T* alignment) { // Locate this species in the alignment. int species_index = get_index_in_string_list(species, get_species_names(alignment)); if (species_index == -1) { die("Can't find %s in alignment.\n", species); } SEQ_T* this_seq = get_alignment_sequence(species_index, alignment); // Get the dimensions of the original matrix. int num_sequences = get_num_aligned_sequences(alignment); int alignment_length = get_alignment_length(alignment); // Allocate memory for raw sequences that will constitute the new alignment. char** raw_sequences = (char**)mm_malloc(sizeof(char*) * num_sequences); int i_seq = 0; for (i_seq = 0; i_seq < num_sequences; i_seq++) { raw_sequences[i_seq] = (char*)mm_calloc(alignment_length + 1, sizeof(char*)); } char* consensus = get_consensus_string(alignment); char* new_consensus = (char*)mm_calloc(alignment_length + 1, sizeof(char*)); // Iterate over all columns. int i_column; int i_raw = 0; for (i_column = 0; i_column < alignment_length; i_column++) { // Is there a gap? char this_char = get_seq_char(i_column, this_seq); if ((this_char != '-') && (this_char != '.')) { // If no gap, then copy over this column. for (i_seq = 0; i_seq < num_sequences; i_seq++) { SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment); char this_char = get_seq_char(i_column, this_sequence); raw_sequences[i_seq][i_raw] = this_char; } new_consensus[i_raw] = consensus[i_column]; i_raw++; } } // Create new sequence objects. SEQ_T** new_sequences = (SEQ_T**)mm_malloc(num_sequences * sizeof(SEQ_T*)); for (i_seq = 0; i_seq < num_sequences; i_seq++) { SEQ_T* this_sequence = get_alignment_sequence(i_seq, alignment); new_sequences[i_seq] = allocate_seq(get_seq_name(this_sequence), get_seq_description(this_sequence), get_seq_offset(this_sequence), raw_sequences[i_seq]); } // Allocate and return the new alignment. ALIGNMENT_T* new_alignment = allocate_alignment(get_alignment_name(alignment), get_alignment_description(alignment), num_sequences, new_sequences, new_consensus); // Free local dynamic memory. for (i_seq = 0; i_seq < num_sequences; i_seq++) { myfree(raw_sequences[i_seq]); free_seq(new_sequences[i_seq]); } myfree(raw_sequences); myfree(new_sequences); myfree(new_consensus); return(new_alignment); }