示例#1
0
END_TEST

START_TEST (number_of_snps_detected_small)
{
  detect_snps("../tests/data/small_alignment.aln");
  fail_unless(  get_number_of_snps()  == 1);
}
示例#2
0
END_TEST 

START_TEST (number_of_snps_detected)
{
detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln");
  fail_unless( get_number_of_snps()  == 5);
}
示例#3
0
END_TEST

START_TEST (valid_number_of_sequences_in_file_with_multiple_lines_per_sequence)
{
  detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln");
  fail_unless( get_number_of_samples() == 109 );
}
示例#4
0
END_TEST

START_TEST (valid_genome_length_with_multiple_lines_per_sequence)
{
  detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln");
  fail_unless( get_length_of_genome() == 2000 );
}
示例#5
0
END_TEST

START_TEST (sample_names_from_alignment_file)
{
  detect_snps("../tests/data/small_alignment.aln");
  char ** current_sequence_names = get_sequence_names();

  fail_unless(strcmp(current_sequence_names[0],"reference_sequence") == 0);
  fail_unless(strcmp(current_sequence_names[1],"comparison_sequence") == 0);
  fail_unless(strcmp(current_sequence_names[2],"another_comparison_sequence") == 0);
}
示例#6
0
static int generate_snp_sites_generic(char filename[],
                                      int output_multi_fasta_file,
                                      int output_vcf_file,
                                      int output_phylip_file,
                                      char output_filename[],
                                      int output_reference, int pure_mode, int output_monomorphic)
{
	int i;
	detect_snps(filename, pure_mode, output_monomorphic);

  bases_for_snps =  calloc(get_number_of_snps()+1, sizeof(char*));
  
	for(i = 0; i < get_number_of_snps(); i++)
	{
		bases_for_snps[i] = calloc(get_number_of_samples()+1, sizeof(char));
	}
  
	get_bases_for_each_snp(filename, bases_for_snps);
  
	char output_filename_base[FILENAME_MAX];
	char filename_without_directory[FILENAME_MAX];
	strip_directory_from_filename(filename, filename_without_directory);
	strncpy(output_filename_base, filename_without_directory, FILENAME_MAX);
  
	if(output_filename != NULL && *output_filename != '\0')
	{
		strncpy(output_filename_base, output_filename, FILENAME_MAX);
	}

	if(output_vcf_file)
	{
		char vcf_output_filename[FILENAME_MAX];
		strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(vcf_output_filename, ".vcf");
		}
		
	  create_vcf_file(vcf_output_filename, get_snp_locations(), get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), get_length_of_genome(), get_pseudo_reference_sequence());
  }

  
  if(output_phylip_file)
  {
		char phylip_output_filename[FILENAME_MAX];
		strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(phylip_output_filename, ".phylip");
		}
	  create_phylib_of_snp_sites(phylip_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations());
  }

  if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0))
  {
		char multi_fasta_output_filename[FILENAME_MAX];
		strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(multi_fasta_output_filename, ".snp_sites.aln");
		}
	  create_fasta_of_snp_sites(multi_fasta_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations());
  }

  // free memory
	free(get_snp_locations());
	for(i = 0; i < get_number_of_samples(); i++)
	{
	//	free(get_sequence_names().[i]);
	}
	for(i = 0; i < get_number_of_snps(); i++)
	{
		free(bases_for_snps[i]);
	}
  free(get_pseudo_reference_sequence());

	return 1;
}
示例#7
0
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[])
{
	size_t length_of_genome;
	char * reference_sequence;
	int number_of_snps;
	int * snp_locations;
	int number_of_samples;
	int i;
	
	length_of_genome = genome_length(filename);
	reference_sequence = (char *) calloc((length_of_genome +1),sizeof(char));
	
	build_reference_sequence(reference_sequence,filename);
	number_of_snps = detect_snps(reference_sequence, filename, length_of_genome);
	
	snp_locations = (int *) calloc((number_of_snps+1),sizeof(int));
	build_snp_locations(snp_locations, reference_sequence);
	free(reference_sequence);
	
	number_of_samples = number_of_sequences_in_file(filename);
	
	// Find out the names of the sequences
	char* sequence_names[number_of_samples];
	sequence_names[number_of_samples-1] = '\0';
	for(i = 0; i < number_of_samples; i++)
	{
		sequence_names[i] = calloc(MAX_SAMPLE_NAME_SIZE,sizeof(char));
	}
	
	get_sample_names_for_header(filename, sequence_names, number_of_samples);
	
	char* bases_for_snps[number_of_snps];
	
	for(i = 0; i < number_of_snps; i++)
	{
		bases_for_snps[i] = calloc(number_of_samples+1 ,sizeof(char));
	}
	
	get_bases_for_each_snp(filename, snp_locations, bases_for_snps, length_of_genome, number_of_snps);
	
	char output_filename_base[FILENAME_MAX];
	char filename_without_directory[FILENAME_MAX];
	strip_directory_from_filename(filename, filename_without_directory);
	strncpy(output_filename_base, filename_without_directory, FILENAME_MAX);
	
	if(output_filename != NULL && *output_filename != '\0')
	{
		strncpy(output_filename_base, output_filename, FILENAME_MAX);
	}

	if(output_vcf_file)
	{
		char vcf_output_filename[FILENAME_MAX];
		strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(vcf_output_filename, ".vcf");
		}
		
	  create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome);
  }

  if(output_phylip_file)
  {
		char phylip_output_filename[FILENAME_MAX];
		strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(phylip_output_filename, ".phylip");
		}
	  create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
  }

  if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0))
  {
		char multi_fasta_output_filename[FILENAME_MAX];
		strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX);
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			strcat(multi_fasta_output_filename, ".snp_sites.aln");
		}
	  create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
  }

  // free memory
	free(snp_locations);
	for(i = 0; i < number_of_samples; i++)
	{
		free(sequence_names[i]);
	}
	for(i = 0; i < number_of_snps; i++)
	{
		free(bases_for_snps[i]);
	}
	

	return 1;
}