END_TEST START_TEST (number_of_snps_detected_small) { detect_snps("../tests/data/small_alignment.aln"); fail_unless( get_number_of_snps() == 1); }
END_TEST START_TEST (number_of_snps_detected) { detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln"); fail_unless( get_number_of_snps() == 5); }
END_TEST START_TEST (valid_number_of_sequences_in_file_with_multiple_lines_per_sequence) { detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln"); fail_unless( get_number_of_samples() == 109 ); }
END_TEST START_TEST (valid_genome_length_with_multiple_lines_per_sequence) { detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln"); fail_unless( get_length_of_genome() == 2000 ); }
END_TEST START_TEST (sample_names_from_alignment_file) { detect_snps("../tests/data/small_alignment.aln"); char ** current_sequence_names = get_sequence_names(); fail_unless(strcmp(current_sequence_names[0],"reference_sequence") == 0); fail_unless(strcmp(current_sequence_names[1],"comparison_sequence") == 0); fail_unless(strcmp(current_sequence_names[2],"another_comparison_sequence") == 0); }
static int generate_snp_sites_generic(char filename[], int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[], int output_reference, int pure_mode, int output_monomorphic) { int i; detect_snps(filename, pure_mode, output_monomorphic); bases_for_snps = calloc(get_number_of_snps()+1, sizeof(char*)); for(i = 0; i < get_number_of_snps(); i++) { bases_for_snps[i] = calloc(get_number_of_samples()+1, sizeof(char)); } get_bases_for_each_snp(filename, bases_for_snps); char output_filename_base[FILENAME_MAX]; char filename_without_directory[FILENAME_MAX]; strip_directory_from_filename(filename, filename_without_directory); strncpy(output_filename_base, filename_without_directory, FILENAME_MAX); if(output_filename != NULL && *output_filename != '\0') { strncpy(output_filename_base, output_filename, FILENAME_MAX); } if(output_vcf_file) { char vcf_output_filename[FILENAME_MAX]; strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(vcf_output_filename, ".vcf"); } create_vcf_file(vcf_output_filename, get_snp_locations(), get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), get_length_of_genome(), get_pseudo_reference_sequence()); } if(output_phylip_file) { char phylip_output_filename[FILENAME_MAX]; strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(phylip_output_filename, ".phylip"); } create_phylib_of_snp_sites(phylip_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations()); } if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0)) { char multi_fasta_output_filename[FILENAME_MAX]; strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(multi_fasta_output_filename, ".snp_sites.aln"); } create_fasta_of_snp_sites(multi_fasta_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations()); } // free memory free(get_snp_locations()); for(i = 0; i < get_number_of_samples(); i++) { // free(get_sequence_names().[i]); } for(i = 0; i < get_number_of_snps(); i++) { free(bases_for_snps[i]); } free(get_pseudo_reference_sequence()); return 1; }
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[]) { size_t length_of_genome; char * reference_sequence; int number_of_snps; int * snp_locations; int number_of_samples; int i; length_of_genome = genome_length(filename); reference_sequence = (char *) calloc((length_of_genome +1),sizeof(char)); build_reference_sequence(reference_sequence,filename); number_of_snps = detect_snps(reference_sequence, filename, length_of_genome); snp_locations = (int *) calloc((number_of_snps+1),sizeof(int)); build_snp_locations(snp_locations, reference_sequence); free(reference_sequence); number_of_samples = number_of_sequences_in_file(filename); // Find out the names of the sequences char* sequence_names[number_of_samples]; sequence_names[number_of_samples-1] = '\0'; for(i = 0; i < number_of_samples; i++) { sequence_names[i] = calloc(MAX_SAMPLE_NAME_SIZE,sizeof(char)); } get_sample_names_for_header(filename, sequence_names, number_of_samples); char* bases_for_snps[number_of_snps]; for(i = 0; i < number_of_snps; i++) { bases_for_snps[i] = calloc(number_of_samples+1 ,sizeof(char)); } get_bases_for_each_snp(filename, snp_locations, bases_for_snps, length_of_genome, number_of_snps); char output_filename_base[FILENAME_MAX]; char filename_without_directory[FILENAME_MAX]; strip_directory_from_filename(filename, filename_without_directory); strncpy(output_filename_base, filename_without_directory, FILENAME_MAX); if(output_filename != NULL && *output_filename != '\0') { strncpy(output_filename_base, output_filename, FILENAME_MAX); } if(output_vcf_file) { char vcf_output_filename[FILENAME_MAX]; strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(vcf_output_filename, ".vcf"); } create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome); } if(output_phylip_file) { char phylip_output_filename[FILENAME_MAX]; strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(phylip_output_filename, ".phylip"); } create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); } if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0)) { char multi_fasta_output_filename[FILENAME_MAX]; strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(multi_fasta_output_filename, ".snp_sites.aln"); } create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); } // free memory free(snp_locations); for(i = 0; i < number_of_samples; i++) { free(sequence_names[i]); } for(i = 0; i < number_of_snps; i++) { free(bases_for_snps[i]); } return 1; }