static int generate_snp_sites_generic(char filename[], int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[], int output_reference, int pure_mode, int output_monomorphic) { int i; detect_snps(filename, pure_mode, output_monomorphic); bases_for_snps = calloc(get_number_of_snps()+1, sizeof(char*)); for(i = 0; i < get_number_of_snps(); i++) { bases_for_snps[i] = calloc(get_number_of_samples()+1, sizeof(char)); } get_bases_for_each_snp(filename, bases_for_snps); char output_filename_base[FILENAME_MAX]; char filename_without_directory[FILENAME_MAX]; strip_directory_from_filename(filename, filename_without_directory); strncpy(output_filename_base, filename_without_directory, FILENAME_MAX); if(output_filename != NULL && *output_filename != '\0') { strncpy(output_filename_base, output_filename, FILENAME_MAX); } if(output_vcf_file) { char vcf_output_filename[FILENAME_MAX]; strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(vcf_output_filename, ".vcf"); } create_vcf_file(vcf_output_filename, get_snp_locations(), get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), get_length_of_genome(), get_pseudo_reference_sequence()); } if(output_phylip_file) { char phylip_output_filename[FILENAME_MAX]; strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(phylip_output_filename, ".phylip"); } create_phylib_of_snp_sites(phylip_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations()); } if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0)) { char multi_fasta_output_filename[FILENAME_MAX]; strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(multi_fasta_output_filename, ".snp_sites.aln"); } create_fasta_of_snp_sites(multi_fasta_output_filename, get_number_of_snps(), bases_for_snps, get_sequence_names(), get_number_of_samples(), output_reference, get_pseudo_reference_sequence(),get_snp_locations()); } // free memory free(get_snp_locations()); for(i = 0; i < get_number_of_samples(); i++) { // free(get_sequence_names().[i]); } for(i = 0; i < get_number_of_snps(); i++) { free(bases_for_snps[i]); } free(get_pseudo_reference_sequence()); return 1; }
int generate_snp_sites(char filename[],int output_multi_fasta_file, int output_vcf_file, int output_phylip_file, char output_filename[]) { size_t length_of_genome; char * reference_sequence; int number_of_snps; int * snp_locations; int number_of_samples; int i; length_of_genome = genome_length(filename); reference_sequence = (char *) calloc((length_of_genome +1),sizeof(char)); build_reference_sequence(reference_sequence,filename); number_of_snps = detect_snps(reference_sequence, filename, length_of_genome); snp_locations = (int *) calloc((number_of_snps+1),sizeof(int)); build_snp_locations(snp_locations, reference_sequence); free(reference_sequence); number_of_samples = number_of_sequences_in_file(filename); // Find out the names of the sequences char* sequence_names[number_of_samples]; sequence_names[number_of_samples-1] = '\0'; for(i = 0; i < number_of_samples; i++) { sequence_names[i] = calloc(MAX_SAMPLE_NAME_SIZE,sizeof(char)); } get_sample_names_for_header(filename, sequence_names, number_of_samples); char* bases_for_snps[number_of_snps]; for(i = 0; i < number_of_snps; i++) { bases_for_snps[i] = calloc(number_of_samples+1 ,sizeof(char)); } get_bases_for_each_snp(filename, snp_locations, bases_for_snps, length_of_genome, number_of_snps); char output_filename_base[FILENAME_MAX]; char filename_without_directory[FILENAME_MAX]; strip_directory_from_filename(filename, filename_without_directory); strncpy(output_filename_base, filename_without_directory, FILENAME_MAX); if(output_filename != NULL && *output_filename != '\0') { strncpy(output_filename_base, output_filename, FILENAME_MAX); } if(output_vcf_file) { char vcf_output_filename[FILENAME_MAX]; strncpy(vcf_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(vcf_output_filename, ".vcf"); } create_vcf_file(vcf_output_filename, snp_locations, number_of_snps, bases_for_snps, sequence_names, number_of_samples, length_of_genome); } if(output_phylip_file) { char phylip_output_filename[FILENAME_MAX]; strncpy(phylip_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(phylip_output_filename, ".phylip"); } create_phylib_of_snp_sites(phylip_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); } if((output_multi_fasta_file) || (output_vcf_file ==0 && output_phylip_file == 0 && output_multi_fasta_file == 0)) { char multi_fasta_output_filename[FILENAME_MAX]; strncpy(multi_fasta_output_filename, output_filename_base, FILENAME_MAX); if((output_vcf_file + output_phylip_file + output_multi_fasta_file) > 1 || (output_filename == NULL || *output_filename == '\0') ) { strcat(multi_fasta_output_filename, ".snp_sites.aln"); } create_fasta_of_snp_sites(multi_fasta_output_filename, number_of_snps, bases_for_snps, sequence_names, number_of_samples); } // free memory free(snp_locations); for(i = 0; i < number_of_samples; i++) { free(sequence_names[i]); } for(i = 0; i < number_of_snps; i++) { free(bases_for_snps[i]); } return 1; }
int main_searchvariants(int argc, char* argv[],char *server_url) { int cmd; int i; search_variant_request *request=(search_variant_request*)malloc(1*sizeof(search_variant_request)); int size_variants = 0; int size_calls = 0; request->name = "null"; request->pageToken ="null"; char debug = 0; static struct option long_options[]={ {"variantSetIds",required_argument,0,'v'}, {"referenceName",required_argument,0,'r'}, {"start",required_argument,0,'s'}, {"end",required_argument,0,'e'}, {"callSetIds",required_argument,0,'c'}, {"variantName",required_argument,0,'n'}, {"debug",no_argument,0,'d'}, {0,0,0,0} }; //if(argc < 2) while((cmd=getopt_long(argc,argv,"v:r:s:e:c:n:d",long_options,NULL))!=-1) { switch(cmd) { case 'v': if(optarg==NULL||(strcmp(optarg,"")==0)) { error("--variantSetIds string can't be empty.\n"); } else { size_variants = count_ids(optarg); request->variantSetIds = (char**)malloc(size_variants*sizeof(char*)); set_ids(optarg,request->variantSetIds,size_variants); } break; case 'r': if(optarg==NULL||(strcmp(optarg,"")==0)) { error("--referenceName string can't be empty.\n"); } else { request->referenceName = optarg; } break; case 's': request->start = atol(optarg); if(request->start < 0) { error("--start integer must be no negative."); } break; case 'e': request->end = atol(optarg); if(request->end < 0) { perror("--end integer must be no negative."); } break; case 'c': if(optarg==NULL||(strcmp(optarg,"")==0)) { size_calls = 0; } else { size_calls = count_ids(optarg); request->callSetIds = (char**)malloc(size_calls*sizeof(char*)); set_ids(optarg,request->callSetIds,size_calls); } break; case 'n': request->name = optarg; break; case 'd': debug = 1; break; case '?': usage(); default: error("Unknown argument %s\n",optarg); } } if(size_variants==0) { usage(); } start_user(server_url); char* vcf_file_name; //process each variantSet for(i=0; i<size_variants; i++) { vcf_file_name = get_variantSetId_vcf_name(request,i); create_vcf_file(vcf_file_name); while(strcmp(request->pageToken,"NULL")!=0) { user->post_fields = create_request_string(request,i,size_calls); //printf("post field string: %s \n",user->post_fields); client_search_request(user,"variants"); //printf("%s\n",user->response); write_vcf_file(user->response,vcf_file_name); request->pageToken = get_pageToken(); //printf("%s \n",request->pageToken); if(debug) { printf("%s\n",user->response); } } } end_user(); return 0; }
void extract_sequences(char vcf_filename[], char tree_filename[],char multi_fasta_filename[],int min_snps, char original_multi_fasta_filename[]) { FILE *vcf_file_pointer; vcf_file_pointer=fopen(vcf_filename, "r"); newick_node* root_node; int number_of_snps; int number_of_columns; int i; int length_of_original_genome; length_of_original_genome = genome_length(original_multi_fasta_filename); number_of_columns = get_number_of_columns_from_file(vcf_file_pointer); char* column_names[number_of_columns]; for(i = 0; i < number_of_columns; i++) { column_names[i] = calloc(MAX_SAMPLE_NAME_SIZE,sizeof(char)); } get_column_names(vcf_file_pointer, column_names, number_of_columns); number_of_snps = number_of_snps_in_phylip(); int snp_locations[number_of_snps]; get_integers_from_column_in_vcf(vcf_file_pointer, snp_locations, number_of_snps, column_number_for_column_name(column_names, "POS", number_of_columns)); root_node = build_newick_tree(tree_filename, vcf_file_pointer,snp_locations, number_of_snps, column_names, number_of_columns, length_of_original_genome,min_snps); fclose(vcf_file_pointer); int filtered_snp_locations[number_of_snps]; int number_of_filtered_snps; int number_of_samples = number_of_samples_from_parse_phylip(); char * sample_names[number_of_samples]; get_sample_names_from_parse_phylip(sample_names); char * reference_sequence_bases; reference_sequence_bases = (char *) calloc((number_of_snps+1),sizeof(char)); get_sequence_for_sample_name(reference_sequence_bases, sample_names[0]); int internal_nodes[number_of_samples]; int a = 0; for(a =0; a < number_of_samples; a++) { internal_nodes[a] = get_internal_node(a); } number_of_filtered_snps = refilter_existing_snps(reference_sequence_bases, number_of_snps, snp_locations, filtered_snp_locations,internal_nodes); char * filtered_bases_for_snps[number_of_filtered_snps]; filter_sequence_bases_and_rotate(reference_sequence_bases, filtered_bases_for_snps, number_of_filtered_snps); create_phylip_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes); create_vcf_file(tree_filename, filtered_snp_locations, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes,0); create_fasta_of_snp_sites(tree_filename, number_of_filtered_snps, filtered_bases_for_snps, sample_names, number_of_samples,internal_nodes); // Create an new tree with updated distances scale_branch_distances(root_node, number_of_filtered_snps); FILE *output_tree_pointer; output_tree_pointer=fopen(tree_filename, "w"); print_tree(root_node,output_tree_pointer); fprintf(output_tree_pointer,";"); fflush(output_tree_pointer); fclose(output_tree_pointer); // Theres a seg fault in here for(i = 0; i < number_of_columns; i++) { free(column_names[i] ); } for(i=0; i<number_of_samples; i++ ) { free(sample_names[i]); } for(i=0; i<number_of_filtered_snps; i++ ) { free(filtered_bases_for_snps[i]); } cleanup_node_memory(root_node); seqFreeAll(); free(reference_sequence_bases); }