示例#1
0
char * read_line(char sequence[], FILE * pFilePtr)
{
    char *pcRes         = NULL;  
    long   lineLength    = 0; 
	  char current_line_buffer[MAX_READ_BUFFER] = {0};
	
	
    while((pcRes = fgets(current_line_buffer, sizeof(current_line_buffer), pFilePtr))  != NULL){
	      if(size_of_string(sequence) > 0)
	      {
	    			sequence = realloc(sequence, sizeof(char)*(size_of_string(sequence) + size_of_string(current_line_buffer) + 2) );
        }
				concat_strings_created_with_malloc(sequence,current_line_buffer);
				current_line_buffer[0] = '\0';
        lineLength = size_of_string(sequence);
        //if end of line character is found then exit from loop
		
        if((sequence)[lineLength] == '\n' || (sequence)[lineLength] == '\0'){
            break;
        }
    }
	 
	 
    return sequence;
}
void create_phylip_of_snp_sites(char filename[], int number_of_snps, char ** bases_for_snps, char ** sequence_names, int number_of_samples, int internal_nodes[])
{
	FILE *fasta_file_pointer;
	int sample_counter;
	int snp_counter;
	char * base_filename;
	
	base_filename = (char *) calloc(1024,sizeof(char));
	memcpy(base_filename, filename, 1024*sizeof(char));
	char extension[8] = {".phylip"};
	concat_strings_created_with_malloc(base_filename,extension);
	fasta_file_pointer = fopen(base_filename, "w");
	
	int number_of_leaves = number_of_samples;
	for(sample_counter=0; sample_counter< number_of_samples; sample_counter++)
	{
		if(internal_nodes[sample_counter] == 1)
		{
			number_of_leaves--;
		}
	}
	
	fprintf( fasta_file_pointer, "%d %d\n", number_of_leaves, number_of_snps);
	
	for(sample_counter=0; sample_counter< number_of_samples; sample_counter++)
	{
		// sequence_name can be more than 10 (relaxed phylip format) and contain [\w\s]
		//TODO check for illegal characters [^\w\s]
		if(internal_nodes[sample_counter] == 1)
		{
			continue;
		}
		fprintf( fasta_file_pointer, "%s\t", sequence_names[sample_counter]);
		
		for(snp_counter=0; snp_counter< number_of_snps; snp_counter++)
		{
			fprintf( fasta_file_pointer, "%c", bases_for_snps[snp_counter][sample_counter]);
		}
		fprintf( fasta_file_pointer, "\n");
	}
  fclose(fasta_file_pointer);
	free(base_filename);
}
示例#3
0
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[])
{
	size_t length_of_genome;
	char * reference_sequence;
	int number_of_snps;
	int * snp_locations;
	int number_of_samples;
	int i;
	
	length_of_genome = genome_length(filename);
	reference_sequence = (char *) calloc((length_of_genome +1),sizeof(char));
	
	build_reference_sequence(reference_sequence,filename);
	number_of_snps = detect_snps(reference_sequence, filename, length_of_genome);
	
	snp_locations = (int *) calloc((number_of_snps+1),sizeof(int));
	build_snp_locations(snp_locations, reference_sequence);
	free(reference_sequence);
	
	number_of_samples = number_of_sequences_in_file(filename);
	
	// Find out the names of the sequences
	char* sequence_names[number_of_samples];
	sequence_names[number_of_samples-1] = '\0';
	for(i = 0; i < number_of_samples; i++)
	{
		sequence_names[i] = calloc(MAX_SAMPLE_NAME_SIZE,sizeof(char));
	}
	
	get_sample_names_for_header(filename, sequence_names, number_of_samples);
	
	char* bases_for_snps[number_of_snps];
	
	for(i = 0; i < number_of_snps; i++)
	{
		bases_for_snps[i] = calloc(number_of_samples+1 ,sizeof(char));
	}
	
	get_bases_for_each_snp(filename, snp_locations, bases_for_snps, length_of_genome, number_of_snps);
	
	char output_filename_base[MAX_FILENAME_SIZE];
	char filename_without_directory[MAX_FILENAME_SIZE];
	strip_directory_from_filename(filename, filename_without_directory);
	memcpy(output_filename_base,filename_without_directory, size_of_string(filename_without_directory)+1 );
	
	if(output_filename != NULL && *output_filename != '\0')
	{
		memcpy(output_filename_base,output_filename, size_of_string(output_filename)+1 );
	}

	if(output_vcf_file)
	{
		char * vcf_output_filename;
		vcf_output_filename = calloc(MAX_FILENAME_SIZE,sizeof(char));
		memcpy(vcf_output_filename, output_filename_base, (MAX_FILENAME_SIZE)*sizeof(char));
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			char extension[5] = {".vcf"};
			concat_strings_created_with_malloc(vcf_output_filename,extension);
		}
		
	  create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome);
		free(vcf_output_filename);
  }

  if(output_phylip_file)
  {
		char *phylip_output_filename;
		phylip_output_filename = calloc(MAX_FILENAME_SIZE,sizeof(char));
		memcpy(phylip_output_filename, output_filename_base, (MAX_FILENAME_SIZE)*sizeof(char));
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			char extension[10] = {".phylip"};
			concat_strings_created_with_malloc(phylip_output_filename,extension);
		}
	  create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
		free(phylip_output_filename);
  }

  if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0))
  {
		char *multi_fasta_output_filename;
		multi_fasta_output_filename = calloc(MAX_FILENAME_SIZE,sizeof(char));
		memcpy(multi_fasta_output_filename, output_filename_base, (MAX_FILENAME_SIZE)*sizeof(char));
		if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') )
		{
			char extension[20] = {".snp_sites.aln"};
			concat_strings_created_with_malloc(multi_fasta_output_filename,extension);
		}
	  create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples);
	  free(multi_fasta_output_filename);
  }

  // free memory
	free(snp_locations);
	for(i = 0; i < number_of_samples; i++)
	{
		free(sequence_names[i]);
	}
	for(i = 0; i < number_of_snps; i++)
	{
		free(bases_for_snps[i]);
	}
	

	return 1;
}