int main(int argc, char ** argv) { FILE * input; struct seq *s1=0, *s2=0; char ori; char c; int fileindex; int del_input=0; while((c = getopt(argc, argv, "a:o:k:m:q:xd:vh")) != (char) -1) { switch (c) { case 'a': align_type = optarg; break; case 'o': output_format = optarg; break; case 'm': min_align = atoi(optarg); break; case 'q': min_qual = atof(optarg); break; case 'x': del_input = 1; break; case 'd': debug_flags_set(optarg); break; case 'v': cctools_version_print(stdout, argv[0]); exit(0); break; default: case 'h': show_help(argv[0]); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); fileindex = optind; if ((argc - optind) == 1) { input = fopen(argv[fileindex], "r"); if (!input) { fprintf(stderr, "sand_align_kernel: couldn't open %s: %s\n",argv[fileindex],strerror(errno)); exit(1); } } else { input = stdin; } struct cseq *c1, *c2; if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) { overlap_write_begin(stdout); } // outer loop: read first sequence in comparison list while((c1=cseq_read(input))) { s1 = cseq_uncompress(c1); cseq_free(c1); // inner loop: read sequences until null (indicating end of list) // then continue again with outer loop. (two nulls to halt.) while((c2=cseq_read(input))) { s2 = cseq_uncompress(c2); cseq_free(c2); int dir = 0; int start1 = 0; int start2 = 0; char* tmp = strdup(s2->metadata); int metadata_valid = 0; char* token = strtok(tmp, " "); start2 = atoi(token); metadata_valid++; while((token = strtok(NULL, " "))) { dir = start1; start1 = start2; start2 = atoi(token); metadata_valid++; } if(metadata_valid>=1 && dir==-1) { seq_reverse_complement(s2); ori = 'I'; } else { ori = 'N'; } struct matrix *m = matrix_create(s1->num_bases,s2->num_bases); if(!m) { fprintf(stderr,"sand_align_kernel: out of memory when creating alignment matrix.\n"); exit(1); } struct alignment *aln; if(!strcmp(align_type,"sw")) { aln = align_smith_waterman(m,s1->data,s2->data); } else if(!strcmp(align_type,"ps")) { aln = align_prefix_suffix(m,s1->data,s2->data, min_align); } else if(!strcmp(align_type,"banded")) { if(metadata_valid<3) { fprintf(stderr,"sand_align_kernel: sequence %s did not indicate start positions for the banded alignment.\n",s2->name); exit(1); } /* The width of the band is proportional to the desired quality of the match. */ int k = 2 + min_qual * MIN(s1->num_bases,s2->num_bases) / 2.0; if(k<5) k = 5; aln = align_banded(m,s1->data, s2->data, start1, start2, k); } else { fprintf(stderr,"unknown alignment type: %s\n",align_type); exit(1); } aln->ori = ori; if(aln->quality <= min_qual) { if(!strcmp(output_format,"ovl")) { overlap_write_v5(stdout, aln, s1->name, s2->name); } else if(!strcmp(output_format, "ovl_new")) { overlap_write_v7(stdout, aln, s1->name, s2->name); } else if(!strcmp(output_format,"matrix")) { printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality); matrix_print(m,s1->data,s2->data); } else if(!strcmp(output_format,"align")) { printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality); alignment_print(stdout,s1->data,s2->data,aln); } else { printf("unknown output format '%s'\n",output_format); exit(1); } } matrix_delete(m); seq_free(s2); alignment_delete(aln); } seq_free(s1); } fclose(input); if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) { overlap_write_end(stdout); } if ((argc - optind) == 1 && del_input == 1) { remove(argv[fileindex]); } return 0; }
void load_sequences(const char *filename) { FILE *file; int i, count, rect_id, rectangle_count; struct cseq *c; size_t size; rectangle_count = 256; rectangle_sizes = malloc(rectangle_count * sizeof(size_t)); file = fopen(filename, "r"); if(!file) fatal("couldn't open %s: %s\n", filename, strerror(errno)); debug(D_DEBUG, "rectangle size: %d\n", rectangle_size); sequences = malloc(rectangle_size * sizeof(struct cseq *)); if(!sequences) fatal("No enough memory to hold %d sequences. (%s) \n", rectangle_size, strerror(errno)); count = 0; rect_id = 0; while(1) { c = cseq_read(file); if(!c) { if(count != rectangle_size && count > 0) { // write the last rectangle to file size = load_rectangle_to_file(rect_id, sequences, count); if(!size) fatal("Failed to write rectangle %d to file. (%s)\n", rect_id, strerror(errno)); rectangle_sizes[rect_id] = size; rect_id++; for(i = 0; i < count; i++) cseq_free(sequences[i]); debug(D_DEBUG, "Rectangle %d has been created.\n", rect_id - 1); } num_rectangles = rect_id; break; } sequences[count] = c; count++; num_seqs++; if(count == rectangle_size) { size = load_rectangle_to_file(rect_id, sequences, count); if(!size) fatal("Failed to write rectangle %d to file. (%s)\n", rect_id, strerror(errno)); rectangle_sizes[rect_id] = size; rect_id++; if(rect_id == rectangle_count) { rectangle_count = rectangle_count * 2; rectangle_sizes = realloc(rectangle_sizes, rectangle_count * sizeof(size_t)); if(!rectangle_sizes) fatal("Failed to allocate memory for holding rectangle sizes. Number of rectangles: %d. (%s)\n", rectangle_count, strerror(errno)); } for(i = 0; i < count; i++) cseq_free(sequences[i]); count = 0; debug(D_DEBUG, "Rectangle %d has been created.\n", rect_id - 1); } } fclose(file); free(sequences); }
int main(int argc, char ** argv) { const char *progname = "sand_compress_reads"; FILE * infile; FILE * outfile; int quiet_mode = 0; struct seq *s; struct cseq *c; signed char d; int clip = 0; int internal = 0; char tmp_id[128]; int count = 0; while((d=getopt(argc,argv,"cvqhi")) > -1) { switch(d) { case 'c': clip = 1; break; case 'i': internal = 1; break; case 'q': quiet_mode = 1; break; case 'v': cctools_version_print(stdout, progname); exit(0); break; case 'h': default: show_help(progname); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); if( optind<argc ) { infile = fopen(argv[optind], "r"); if(!infile) { fprintf(stderr,"%s: couldn't open %s: %s\n",progname,argv[optind],strerror(errno)); return 1; } optind++; } else { infile = stdin; } if( optind<argc ) { outfile = fopen(argv[optind],"w"); if(!outfile) { fprintf(stderr,"%s: couldn't open %s: %s\n",progname,argv[optind],strerror(errno)); return 1; } optind++; } else { outfile = stdout; } while((s = seq_read(infile))) { if(clip != 0 || internal != 0){ strcpy(tmp_id, s->name); strcpy(s->name, strtok(tmp_id,",")); if(internal != 0){ strcpy(s->name, strtok(NULL,",")); } } c = seq_compress(s); cseq_write(outfile,c); cseq_free(c); seq_free(s); count++; } if(!quiet_mode) { fprintf(stderr,"%d sequences compressed.\n",count); } fclose(infile); fclose(outfile); return 0; }