Exemplo n.º 1
0
Arquivo: faidx.c Projeto: pkrusche/vt
char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len)
{
	int l;
	char c;
    khiter_t iter;
    faidx1_t val;
	char *seq=NULL;

    // Adjust position
    iter = kh_get(s, fai->hash, c_name);
    if(iter == kh_end(fai->hash)) return 0;
    val = kh_value(fai->hash, iter);
	if(p_end_i < p_beg_i) p_beg_i = p_end_i;
    if(p_beg_i < 0) p_beg_i = 0;
    else if(val.len <= p_beg_i) p_beg_i = val.len - 1;
    if(p_end_i < 0) p_end_i = 0;
    else if(val.len <= p_end_i) p_end_i = val.len - 1;

    // Now retrieve the sequence 
	int ret = bgzf_useek(fai->bgzf, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET);
    if ( ret<0 )
    {
        *len = -1;
        fprintf(stderr, "[fai_fetch_seq] Error: fai_fetch failed. (Seeking in a compressed, .gzi unindexed, file?)\n");
        return NULL;
    }
	l = 0;
	seq = (char*)malloc(p_end_i - p_beg_i + 2);
	while ( (c=bgzf_getc(fai->bgzf))>=0 && l < p_end_i - p_beg_i + 1)
		if (isgraph(c)) seq[l++] = c;
	seq[l] = '\0';
	*len = l;
	return seq;
}
Exemplo n.º 2
0
char *fai_fetch(const faidx_t *fai, const char *str, int *len)
{
    char *s;
    int c, i, l, k, name_end;
    khiter_t iter;
    faidx1_t val;
    khash_t(s) *h;
    int beg, end;

    beg = end = -1;
    h = fai->hash;
    name_end = l = strlen(str);
    s = (char*)malloc(l+1);
    // remove space
    for (i = k = 0; i < l; ++i)
        if (!isspace(str[i])) s[k++] = str[i];
    s[k] = 0; l = k;
    // determine the sequence name
    for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end
    if (i >= 0) name_end = i;
    if (name_end < l) { // check if this is really the end
        int n_hyphen = 0;
        for (i = name_end + 1; i < l; ++i) {
            if (s[i] == '-') ++n_hyphen;
            else if (!isdigit(s[i]) && s[i] != ',') break;
        }
        if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name
        s[name_end] = 0;
        iter = kh_get(s, h, s);
        if (iter == kh_end(h)) { // cannot find the sequence name
            iter = kh_get(s, h, str); // try str as the name
            if (iter == kh_end(h)) {
                *len = 0;
            free(s); return 0;
            } else s[name_end] = ':', name_end = l;
        }
    } else iter = kh_get(s, h, str);
    if(iter == kh_end(h)) {
        fprintf(stderr, "[fai_fetch] Warning - Reference %s not found in FASTA file, returning empty sequence\n", str);
        free(s);
        *len = -2;
        return 0;
    };
    val = kh_value(h, iter);
    // parse the interval
    if (name_end < l) {
        for (i = k = name_end + 1; i < l; ++i)
            if (s[i] != ',') s[k++] = s[i];
        s[k] = 0;
        beg = atoi(s + name_end + 1);
        for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break;
        end = i < k? atoi(s + i + 1) : val.len;
        if (beg > 0) --beg;
    } else beg = 0, end = val.len;
    if (beg >= val.len) beg = val.len;
    if (end >= val.len) end = val.len;
    if (beg > end) beg = end;
    free(s);

    // now retrieve the sequence
    int ret = bgzf_useek(fai->bgzf, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);
    if ( ret<0 )
    {
        *len = -1;
        fprintf(stderr, "[fai_fetch] Error: fai_fetch failed. (Seeking in a compressed, .gzi unindexed, file?)\n");
        return NULL;
    }
    l = 0;
    s = (char*)malloc(end - beg + 2);
    while ( (c=bgzf_getc(fai->bgzf))>=0 && l < end - beg )
        if (isgraph(c)) s[l++] = c;
    s[l] = '\0';
    *len = l;
    return s;
}
Exemplo n.º 3
0
Arquivo: bgzip.c Projeto: Illumina/akt
int main(int argc, char **argv)
{
    int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0;
    BGZF *fp;
    void *buffer;
    long start, end, size;
    char *index_fname = NULL;
    int threads = 1;

    static const struct option loptions[] =
    {
        {"help", no_argument, NULL, 'h'},
        {"offset", required_argument, NULL, 'b'},
        {"stdout", no_argument, NULL, 'c'},
        {"decompress", no_argument, NULL, 'd'},
        {"force", no_argument, NULL, 'f'},
        {"index", no_argument, NULL, 'i'},
        {"index-name", required_argument, NULL, 'I'},
        {"reindex", no_argument, NULL, 'r'},
        {"rebgzip",no_argument,NULL,'g'},
        {"size", required_argument, NULL, 's'},
        {"threads", required_argument, NULL, '@'},
        {"version", no_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };

    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){
        switch(c){
        case 'd': compress = 0; break;
        case 'c': pstdout = 1; break;
        case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
        case 's': size = atol(optarg); pstdout = 1; break;
        case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
        case 'g': rebgzip = 1; break;
        case 'r': reindex = 1; compress = 0; break;
        case '@': threads = atoi(optarg); break;
        case 1:
            printf(
"bgzip (htslib) %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
            return EXIT_SUCCESS;
        case 'h':
        case '?': return bgzip_main_usage();
        }
    }
    if (size >= 0) end = start + size;
    if (end >= 0 && end < start) {
        fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
        return 1;
    }
    if (compress == 1) {
        struct stat sbuf;
        int f_src = fileno(stdin);

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if (pstdout)
                fp = bgzf_open("-", "w");
            else
            {
                char *name = malloc(strlen(argv[optind]) + 5);
                strcpy(name, argv[optind]);
                strcat(name, ".gz");
                fp = bgzf_open(name, is_forced? "w" : "wx");
                if (fp == NULL && errno == EEXIST && confirm_overwrite(name))
                    fp = bgzf_open(name, "w");
                if (fp == NULL) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdout)) )
            return bgzip_main_usage();
        else if ( index && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }
        else
            fp = bgzf_open("-", "w");

        if ( index && rebgzip )
        {
            fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
            return 1;
        }

        if ( rebgzip && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }

        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        if ( index ) bgzf_index_build_init(fp);
        buffer = malloc(WINDOW_SIZE);
#ifdef _WIN32
        _setmode(f_src, O_BINARY);
#endif
        if (rebgzip){
            if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);

            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        else {
            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        if ( index )
        {
            if (index_fname) {
                if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                    error("Could not write index to '%s'\n", index_fname);
            } else {
                if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
                    error("Could not write index to '%s.gz.gzi'", argv[optind]);
            }
        }
        if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
        if (argc > optind && !pstdout) unlink(argv[optind]);
        free(buffer);
        close(f_src);
        return 0;
    }
    else if ( reindex )
    {
        if ( argc>optind )
        {
            fp = bgzf_open(argv[optind], "r");
            if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
        }
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
            fp = bgzf_open("-", "r");
            if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

        buffer = malloc(BGZF_BLOCK_SIZE);
        bgzf_index_build_init(fp);
        int ret;
        while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");

        if ( index_fname ) {
            if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                error("Could not write index to '%s'\n", index_fname);
        } else {
            if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
                error("Could not write index to '%s.gzi'\n", argv[optind]);
        }

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
    }
    else
    {
        struct stat sbuf;
        int f_dst;

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }
            char *name;
            int len = strlen(argv[optind]);
            if ( strcmp(argv[optind]+len-3,".gz") )
            {
                fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
                return 1;
            }
            fp = bgzf_open(argv[optind], "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
                return 1;
            }

            if (pstdout) {
                f_dst = fileno(stdout);
            }
            else {
                const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
                name = strdup(argv[optind]);
                name[strlen(name) - 3] = '\0';
                f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666);
                if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name))
                    f_dst = open(name, wrflags, 0666);
                if (f_dst < 0) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdin)) )
            return bgzip_main_usage();
        else
        {
            f_dst = fileno(stdout);
            fp = bgzf_open("-", "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
                return 1;
            }
        }
        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
#ifdef _WIN32
        _setmode(f_dst, O_BINARY);
#endif
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
            start += c;
            if ( write(f_dst, buffer, c) != c ) {
#ifdef _WIN32
                if (GetLastError() != ERROR_NO_DATA)
#endif
                error("Could not write %d bytes\n", c);
            }
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
        if (!pstdout) unlink(argv[optind]);
        return 0;
    }
}
Exemplo n.º 4
0
Arquivo: bgzip.c Projeto: Abdul59/STAR
int main(int argc, char **argv)
{
	int c, compress, pstdout, is_forced, index = 0, reindex = 0;
	BGZF *fp;
	void *buffer;
	long start, end, size;
    char *index_fname = NULL;

    static struct option loptions[] = 
    {
        {"help",0,0,'h'},
        {"offset",1,0,'b'},
        {"stdout",0,0,'c'},
        {"decompress",0,0,'d'},
        {"force",0,0,'f'},
        {"index",0,0,'i'},
        {"index-name",1,0,'I'},
        {"reindex",0,0,'r'},
        {"size",1,0,'s'},
        {0,0,0,0}
    };

	compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
	while((c  = getopt_long(argc, argv, "cdh?fb:s:iI:r",loptions,NULL)) >= 0){
		switch(c){
		case 'd': compress = 0; break;
		case 'c': pstdout = 1; break;
		case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
		case 's': size = atol(optarg); pstdout = 1; break;
		case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
        case 'r': reindex = 1; compress = 0; break;
		case 'h': 
        case '?': return bgzip_main_usage();
		}
	}
	if (size >= 0) end = start + size;
	if (end >= 0 && end < start) {
		fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
		return 1;
	}
	if (compress == 1) {
		struct stat sbuf;
		int f_src = fileno(stdin);
		int f_dst = fileno(stdout);

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 ) 
			{ 
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1; 
			}

			if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}

			if (pstdout)
				f_dst = fileno(stdout);
			else
			{
				char *name = malloc(strlen(argv[optind]) + 5);
				strcpy(name, argv[optind]);
				strcat(name, ".gz");
				f_dst = write_open(name, is_forced);
				if (f_dst < 0) return 1;
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdout)) )
			return bgzip_main_usage();
        else if ( index && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }

		fp = bgzf_fdopen(f_dst, "w");
        if ( index ) bgzf_index_build_init(fp);
		buffer = malloc(WINDOW_SIZE);
		while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
			if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
		// f_dst will be closed here
        if ( index ) 
        {
            if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL);
            else bgzf_index_dump(fp, argv[optind], ".gz.gzi");
        }
		if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
		if (argc > optind && !pstdout) unlink(argv[optind]);
		free(buffer);
		close(f_src);
		return 0;
	}
    else if ( reindex )
    {
        if ( argc>optind )
        {
			fp = bgzf_open(argv[optind], "r");
            if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
        }
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
            fp = bgzf_fdopen(fileno(stdin), "r");
        	if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

        buffer = malloc(BGZF_BLOCK_SIZE);
        bgzf_index_build_init(fp);
        int ret;
        while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");
 
        if ( index_fname )
            bgzf_index_dump(fp, index_fname, NULL);
        else 
            bgzf_index_dump(fp, argv[optind], ".gzi");

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
    }
    else
    {
		struct stat sbuf;
		int f_dst;

		if ( argc>optind )
		{
			if ( stat(argv[optind],&sbuf)<0 )
			{
				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
				return 1;
			}
			char *name;
			int len = strlen(argv[optind]);
			if ( strcmp(argv[optind]+len-3,".gz") )
			{
				fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
				return 1;
			}
			fp = bgzf_open(argv[optind], "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
				return 1;
			}

			if (pstdout) {
				f_dst = fileno(stdout);
			}
			else {
				name = strdup(argv[optind]);
				name[strlen(name) - 3] = '\0';
				f_dst = write_open(name, is_forced);
				free(name);
			}
		}
		else if (!pstdout && isatty(fileno((FILE *)stdin)) )
			return bgzip_main_usage();
		else
		{
			f_dst = fileno(stdout);
			fp = bgzf_fdopen(fileno(stdin), "r");
			if (fp == NULL) {
				fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
				return 1;
			}
		}
        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
            start += c;
            if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\n", c);
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
        if (!pstdout) unlink(argv[optind]);
        return 0;
	}
    return 0;
}