int main(int argc, char ** argv)
{
    FILE * input;
    struct seq *s1=0, *s2=0;
    char ori;
    char c;
    int fileindex;
    int del_input=0;

    while((c = getopt(argc, argv, "a:o:k:m:q:xd:vh")) != (char) -1) {
        switch (c) {
        case 'a':
            align_type = optarg;
            break;
        case 'o':
            output_format = optarg;
            break;
        case 'm':
            min_align = atoi(optarg);
            break;
        case 'q':
            min_qual = atof(optarg);
            break;
        case 'x':
            del_input = 1;
            break;
        case 'd':
            debug_flags_set(optarg);
            break;
        case 'v':
            cctools_version_print(stdout, argv[0]);
            exit(0);
            break;
        default:
        case 'h':
            show_help(argv[0]);
            exit(0);
            break;
        }
    }

    cctools_version_debug(D_DEBUG, argv[0]);

    fileindex = optind;
    if ((argc - optind) == 1) {
        input = fopen(argv[fileindex], "r");
        if (!input) {
            fprintf(stderr, "sand_align_kernel: couldn't open %s: %s\n",argv[fileindex],strerror(errno));
            exit(1);
        }
    } else {
        input = stdin;
    }

    struct cseq *c1, *c2;

    if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) {
        overlap_write_begin(stdout);
    }

    // outer loop: read first sequence in comparison list

    while((c1=cseq_read(input))) {
        s1 = cseq_uncompress(c1);
        cseq_free(c1);

        // inner loop: read sequences until null (indicating end of list)
        // then continue again with outer loop.  (two nulls to halt.)

        while((c2=cseq_read(input))) {
            s2 = cseq_uncompress(c2);
            cseq_free(c2);

            int dir = 0;
            int start1 = 0;
            int start2 = 0;
            char* tmp = strdup(s2->metadata);
            int metadata_valid = 0;

            char* token = strtok(tmp, "	 ");
            start2 = atoi(token);
            metadata_valid++;
            while((token = strtok(NULL, "	 ")))
            {
                dir = start1;
                start1 = start2;
                start2 = atoi(token);
                metadata_valid++;
            }

            if(metadata_valid>=1 && dir==-1) {
                seq_reverse_complement(s2);
                ori = 'I';
            } else {
                ori = 'N';
            }

            struct matrix *m = matrix_create(s1->num_bases,s2->num_bases);
            if(!m) {
                fprintf(stderr,"sand_align_kernel: out of memory when creating alignment matrix.\n");
                exit(1);
            }

            struct alignment *aln;

            if(!strcmp(align_type,"sw")) {

                aln = align_smith_waterman(m,s1->data,s2->data);

            } else if(!strcmp(align_type,"ps")) {

                aln = align_prefix_suffix(m,s1->data,s2->data, min_align);

            } else if(!strcmp(align_type,"banded")) {
                if(metadata_valid<3) {
                    fprintf(stderr,"sand_align_kernel: sequence %s did not indicate start positions for the banded alignment.\n",s2->name);
                    exit(1);
                }

                /* The width of the band is proportional to the desired quality of the match. */

                int k = 2 + min_qual * MIN(s1->num_bases,s2->num_bases) / 2.0;
                if(k<5) k = 5;

                aln = align_banded(m,s1->data, s2->data, start1, start2, k);
            } else {
                fprintf(stderr,"unknown alignment type: %s\n",align_type);
                exit(1);
            }

            aln->ori = ori;

            if(aln->quality <= min_qual) {
                if(!strcmp(output_format,"ovl")) {
                    overlap_write_v5(stdout, aln, s1->name, s2->name);
                } else if(!strcmp(output_format, "ovl_new")) {
                    overlap_write_v7(stdout, aln, s1->name, s2->name);
                } else if(!strcmp(output_format,"matrix")) {
                    printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality);
                    matrix_print(m,s1->data,s2->data);
                } else if(!strcmp(output_format,"align")) {
                    printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality);
                    alignment_print(stdout,s1->data,s2->data,aln);
                } else {
                    printf("unknown output format '%s'\n",output_format);
                    exit(1);
                }
            }

            matrix_delete(m);
            seq_free(s2);
            alignment_delete(aln);
        }
        seq_free(s1);
    }

    fclose(input);

    if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) {
        overlap_write_end(stdout);
    }

    if ((argc - optind) == 1 && del_input == 1)
    {
        remove(argv[fileindex]);
    }
    return 0;
}
Example #2
0
void load_sequences(const char *filename)
{
    FILE *file;
    int i, count, rect_id, rectangle_count;
    struct cseq *c;
    size_t size;

    rectangle_count = 256;
    rectangle_sizes = malloc(rectangle_count * sizeof(size_t));

    file = fopen(filename, "r");
    if(!file)
        fatal("couldn't open %s: %s\n", filename, strerror(errno));

    debug(D_DEBUG, "rectangle size: %d\n", rectangle_size);
    sequences = malloc(rectangle_size * sizeof(struct cseq *));
    if(!sequences)
        fatal("No enough memory to hold %d sequences. (%s) \n", rectangle_size, strerror(errno));


    count = 0;
    rect_id = 0;
    while(1) {
        c = cseq_read(file);
        if(!c) {
            if(count != rectangle_size && count > 0) {	// write the last rectangle to file
                size = load_rectangle_to_file(rect_id, sequences, count);
                if(!size)
                    fatal("Failed to write rectangle %d to file. (%s)\n", rect_id, strerror(errno));
                rectangle_sizes[rect_id] = size;
                rect_id++;
                for(i = 0; i < count; i++)
                    cseq_free(sequences[i]);
                debug(D_DEBUG, "Rectangle %d has been created.\n", rect_id - 1);
            }

            num_rectangles = rect_id;
            break;
        }
        sequences[count] = c;
        count++;
        num_seqs++;

        if(count == rectangle_size) {
            size = load_rectangle_to_file(rect_id, sequences, count);
            if(!size)
                fatal("Failed to write rectangle %d to file. (%s)\n", rect_id, strerror(errno));
            rectangle_sizes[rect_id] = size;
            rect_id++;
            if(rect_id == rectangle_count) {
                rectangle_count = rectangle_count * 2;
                rectangle_sizes = realloc(rectangle_sizes, rectangle_count * sizeof(size_t));
                if(!rectangle_sizes)
                    fatal("Failed to allocate memory for holding rectangle sizes. Number of rectangles: %d. (%s)\n", rectangle_count, strerror(errno));
            }
            for(i = 0; i < count; i++)
                cseq_free(sequences[i]);
            count = 0;
            debug(D_DEBUG, "Rectangle %d has been created.\n", rect_id - 1);
        }
    }

    fclose(file);
    free(sequences);
}
Example #3
0
int main(int argc, char ** argv)
{
	const char *progname = "sand_compress_reads";
	FILE * infile;
	FILE * outfile;
	int quiet_mode = 0;
	struct seq *s;
	struct cseq *c;
	signed char d;
	int clip = 0;
	int internal = 0;
	char tmp_id[128];
	int count = 0;

		while((d=getopt(argc,argv,"cvqhi")) > -1) {
				switch(d) {
		case 'c':
			clip = 1;
			break;
		case 'i':
			internal = 1;
			break;
		case 'q':
			quiet_mode = 1;
			break;
		case 'v':
			cctools_version_print(stdout, progname);
			exit(0);
			break;
				case 'h':
		default:
						show_help(progname);
						exit(0);
						break;
				}
		}

	cctools_version_debug(D_DEBUG, argv[0]);

	if( optind<argc ) {
		infile = fopen(argv[optind], "r");
		if(!infile) {
			fprintf(stderr,"%s: couldn't open %s: %s\n",progname,argv[optind],strerror(errno));
			return 1;
		}
		optind++;
	} else {
		infile = stdin;
	}

	if( optind<argc ) {
		outfile = fopen(argv[optind],"w");
		if(!outfile) {
			fprintf(stderr,"%s: couldn't open %s: %s\n",progname,argv[optind],strerror(errno));
			return 1;
		}
		optind++;
	} else {
		outfile = stdout;
	}

	while((s = seq_read(infile))) {
		if(clip != 0 || internal != 0){
			strcpy(tmp_id, s->name);
			strcpy(s->name, strtok(tmp_id,","));
			if(internal != 0){
				strcpy(s->name, strtok(NULL,","));
			}
		}

		c = seq_compress(s);
		cseq_write(outfile,c);
		cseq_free(c);
		seq_free(s);
		count++;
	}

	if(!quiet_mode) {
		fprintf(stderr,"%d sequences compressed.\n",count);
	}

	fclose(infile);
	fclose(outfile);

	return 0;
}