int main (int argc, char * argv[]) { // Parse the command line. if (argc != 4) { fprintf(stderr, "Usage: reduce-alignment <start> <width> <alignment>\n"); exit(1); } int start_position = atoi(argv[1]); int width = atoi(argv[2]); char* alignment_filename = argv[3]; // Read the alignment. ALIGNMENT_T* big_alignment = read_alignment_from_file( alignment_filename, FALSE, FALSE, NULL // pointer to ref_seq_index, not used. ); fprintf(stderr, "Read alignment of %d sequences and %d columns.\n", get_num_aligned_sequences(big_alignment), get_alignment_length(big_alignment)); if (start_position + width > get_alignment_length(big_alignment)) { fprintf(stderr, "Invalid coordinates: %d + %d > %d.\n", start_position, width, get_alignment_length(big_alignment)); exit(1); } // Extract the smaller alignment. ALIGNMENT_T* small_alignment = extract_subalignment(start_position, width, big_alignment); fprintf(stderr, "Created alignment of %d sequences and %d columns.\n", get_num_aligned_sequences(small_alignment), get_alignment_length(small_alignment)); // Print the alignment. print_clustalw(stdout, FALSE, small_alignment); // Free locally allocated memory. free_alignment(big_alignment); free_alignment(small_alignment); return(0); }
/**************************************************************************** * Return a list containing the empirical column frequency distributions * for all alignments in the input. * * Each file in the list of filenames is read and the species list is * determined. The counts of each occurring column are tallied. * All files with the same species lists get their counts combined. * * The returned list contains one distribution per species list that * occurs in some alignment. ****************************************************************************/ OBJECT_LIST_T* get_alignment_column_freqs_list (ALPH_T alph, STRING_LIST_T* filenames, BOOLEAN_T remove_allgap_seqs) { int file_index; int num_filenames = get_num_strings(filenames); ARRAY_T* alignment_column_freqs = NULL; OBJECT_LIST_T* alignment_column_freqs_list = new_object_list(equal_string_lists, (void*)copy_string_list, free_string_list, free_array); // Consider each alignment in turn. for(file_index = 0; file_index < num_filenames; file_index++) { char* filename = get_nth_string(file_index, filenames); if (verbosity >= NORMAL_VERBOSE && !(file_index % 1)) { fprintf( stderr, "Computing column freqs: alignment file number %d of %d total files.\n", file_index+1, num_filenames ); } // Read the alignment int ref_seq_index = 0; ALIGNMENT_T* alignment = read_alignment_from_file(filename, TRUE, remove_allgap_seqs, &ref_seq_index); STRING_LIST_T* alignment_species = get_species_names(alignment); // Try to retrieve the counts so far for this list of species. alignment_column_freqs = (ARRAY_T*)retrieve_object( alignment_species, alignment_column_freqs_list ); // Found counts for current species list? if (alignment_column_freqs) { // Add counts from current alignment. (void) build_alignment_column_counts(alph, alignment, alignment_column_freqs); // Note: objects in lists are references, so no need to re-store // after modification. } // Didn't find counts for this species list, so create new array of counts. else { alignment_column_freqs = build_alignment_column_counts(alph, alignment, NULL); store_object( (void*)alignment_column_freqs, (void*)alignment_species, 0.0, // Score alignment_column_freqs_list ); } // free space used by alignment free_alignment(alignment); } // each filename fprintf(stderr, "\n"); // Convert counts to frequencies by retrieving each array of counts // and dividing by the total counts for that list of species. while ( (alignment_column_freqs = retrieve_next_object(alignment_column_freqs_list) ) != NULL ) { int i; int num_freqs = get_array_length(alignment_column_freqs); double total_counts; // Get total counts. for (i=total_counts=0; i<num_freqs; i++) { total_counts += get_array_item(i, alignment_column_freqs); } // Get frequencies. for (i=0; i<num_freqs; i++) { double f = get_array_item(i, alignment_column_freqs); set_array_item(i, f/total_counts, alignment_column_freqs); #ifdef DEBUG int asize = alph_size(alph, ALPH_SIZE); int num_leaves = NINT(log(num_freqs)/log(asize)); char* alignment_col = mm_malloc((num_leaves + 1) * sizeof(char)); unhash_alignment_col( alph, i, //col_index alignment_col, num_leaves ); printf("%s %g %g\n", alignment_col, f, f/total_counts); myfree(alignment_col); #endif } // get frequencies } // while more species lists return(alignment_column_freqs_list); } // get_alignment_column_freqs_list