コード例 #1
0
bool
tabix_streamer::
next() {
    if (_is_stream_end || (NULL==_tfp) || (NULL==_titer)) return false;

    int len;
    _linebuf = (char*) ti_read(_tfp, _titer, &len);

    _is_stream_end=(NULL == _linebuf);
    _is_record_set=(! _is_stream_end);
    if (_is_record_set) _record_no++;

    return _is_record_set;
}
コード例 #2
0
ファイル: tabixmodule.c プロジェクト: AngieHinrichs/samtabix
static PyObject *
tabixiter_iternext(TabixIteratorObject *self)
{
    const char *chunk;
    int len, i;

    chunk = ti_read(self->tbobj->tb, self->iter, &len);
    if (chunk != NULL) {
        PyObject *ret, *column;
        Py_ssize_t colidx;
        const char *ptr, *begin;

        ret = PyList_New(0);
        if (ret == NULL)
            return NULL;

        colidx = 0;
        ptr = begin = chunk;
        for (i = len; i > 0; i--, ptr++)
            if (*ptr == '\t') {
                column = PYOBJECT_FROM_STRING_AND_SIZE(begin,
                                                       (Py_ssize_t)(ptr - begin));
                if (column == NULL || PyList_Append(ret, column) == -1) {
                    Py_DECREF(ret);
                    return NULL;
                }

                Py_DECREF(column);
                begin = ptr + 1;
                colidx++;
            }

        column = PYOBJECT_FROM_STRING_AND_SIZE(begin, (Py_ssize_t)(ptr - begin));
        if (column == NULL || PyList_Append(ret, column) == -1) {
            Py_DECREF(ret);
            return NULL;
        }
        Py_DECREF(column);

        return ret;
    }
    else
        return NULL;
}
コード例 #3
0
bool
tabix_header_streamer::
next() {
    if (_is_stream_end || (NULL==_tfp) || (NULL==_titer)) return false;

    int len;
    _linebuf = (char*) ti_read(_tfp, _titer, &len);

    if (NULL == _linebuf) {
        _is_stream_end=true;
    } else {
        if ((strlen(_linebuf)<1) || (_linebuf[0] != '#')) {
            _is_stream_end=true;
        }
    }

    _is_record_set=(! _is_stream_end);

    return _is_record_set;
}
コード例 #4
0
ファイル: main.c プロジェクト: Brainiarc7/TS
int main(int argc, char *argv[])
{
	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
	ti_conf_t conf = ti_conf_gff;
    const char *reheader = NULL;
	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
		switch (c) {
		case 'B': bed_reg = 1; break;
		case '0': conf.preset |= TI_FLAG_UCSC; break;
		case 'S': skip = atoi(optarg); break;
		case 'c': meta = optarg[0]; break;
		case 'p':
			if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
			else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
			else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
			else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
			else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
			else {
				fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
				return 1;
			}
			break;
		case 's': conf.sc = atoi(optarg); break;
		case 'b': conf.bc = atoi(optarg); break;
		case 'e': conf.ec = atoi(optarg); break;
        case 'l': list_chrms = 1; break;
        case 'h': print_header = 1; break;
		case 'f': force = 1; break;
        case 'r': reheader = optarg; break;
		}
	}
	if (skip >= 0) conf.line_skip = skip;
	if (meta >= 0) conf.meta_char = meta;
	if (optind == argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
		fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
		fprintf(stderr, "Usage:   tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
		fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
		fprintf(stderr, "         -s INT     sequence name column [1]\n");
		fprintf(stderr, "         -b INT     start column [4]\n");
		fprintf(stderr, "         -e INT     end column; can be identical to '-b' [5]\n");
		fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
		fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
	    fprintf(stderr, "         -r FILE    replace the header with the content of FILE [null]\n");
		fprintf(stderr, "         -B         region1 is a BED file (entire file will be read)\n");
		fprintf(stderr, "         -0         zero-based coordinate\n");
		fprintf(stderr, "         -h         print the header lines\n");
		fprintf(stderr, "         -l         list chromosome names\n");
		fprintf(stderr, "         -f         force to overwrite the index\n");
		fprintf(stderr, "\n");
		return 1;
	}
    if (list_chrms) {
		ti_index_t *idx;
		int i, n;
		const char **names;
		idx = ti_index_load(argv[optind]);
		if (idx == 0) {
			fprintf(stderr, "[main] fail to load the index file.\n");
			return 1;
		}
		names = ti_seqname(idx, &n);
		for (i = 0; i < n; ++i) printf("%s\n", names[i]);
		free(names);
		ti_index_destroy(idx);
		return 0;
	}
    if (reheader)
        return reheader_file(reheader,argv[optind],conf.meta_char);

	struct stat stat_tbi,stat_vcf;
    char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
   	strcat(strcpy(fnidx, argv[optind]), ".tbi");

	if (optind + 1 == argc) {
		if (force == 0) {
			if (stat(fnidx, &stat_tbi) == 0) 
            {
                // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
                //  people tend not to notice that tabix failed
                stat(argv[optind], &stat_vcf);
                if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
                {
                    fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
                    free(fnidx);
                    return 1;
                }
			}
		}
        if ( bgzf_check_bgzf(argv[optind])!=1 )
        {
            fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
            free(fnidx);
            return 1;
        }
		return ti_index_build(argv[optind], &conf);
	}
	{ // retrieve
		tabix_t *t;
        // Common source of errors: new VCF is used with an old index
        stat(fnidx, &stat_tbi);
        stat(argv[optind], &stat_vcf);
        if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
        {
            fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
            free(fnidx);
            return 1;
        }
        free(fnidx);

		if ((t = ti_open(argv[optind], 0)) == 0) {
			fprintf(stderr, "[main] fail to open the data file.\n");
			return 1;
		}
		if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
			ti_iter_t iter;
			const char *s;
			int len;
			iter = ti_query(t, 0, 0, 0);
			while ((s = ti_read(t, iter, &len)) != 0) {
				fputs(s, stdout); fputc('\n', stdout);
			}
			ti_iter_destroy(iter);
		} else { // retrieve from specified regions
			int i, len;
            ti_iter_t iter;
            const char *s;
			const ti_conf_t *idxconf;

			if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
                fprintf(stderr,"[tabix] failed to load the index file.\n");
                return 1;
            }
			idxconf = ti_get_conf(t->idx);

            if ( print_header )
            {
                // If requested, print the header lines here
                iter = ti_query(t, 0, 0, 0);
                while ((s = ti_read(t, iter, &len)) != 0) {
                    if ((int)(*s) != idxconf->meta_char) break;
                    fputs(s, stdout); fputc('\n', stdout);
                }
                ti_iter_destroy(iter);
            }
			if (bed_reg) {
				extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
				extern void *bed_read(const char *fn);
				extern void bed_destroy(void *_h);

				const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
				void *bed = bed_read(argv[optind+1]); // load the BED file
				ti_interval_t intv;

				if (bed == 0) {
					fprintf(stderr, "[main] fail to read the BED file.\n");
					return 1;
				}
				iter = ti_query(t, 0, 0, 0);
				while ((s = ti_read(t, iter, &len)) != 0) {
					int c;
					ti_get_intv(conf_, len, (char*)s, &intv);
					c = *intv.se; *intv.se = '\0';
					if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
						*intv.se = c;
						puts(s);
					}
					*intv.se = c;
				}
                ti_iter_destroy(iter);
				bed_destroy(bed);
			} else {
				for (i = optind + 1; i < argc; ++i) {
					int tid, beg, end;
					if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
						iter = ti_queryi(t, tid, beg, end);
							while ((s = ti_read(t, iter, &len)) != 0) {
							fputs(s, stdout); fputc('\n', stdout);
						}
						ti_iter_destroy(iter);
					} 
            	    // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
				}
			}
		}
		ti_close(t);
	}
	return 0;
}
コード例 #5
0
ファイル: bcfReader.c プロジェクト: hoffman/WiggleTools
static char * nextLine(BCFReaderData * data) {
	if (data->tabix_iterator)
		return ti_read(data->tabix_file, data->tabix_iterator, 0);
	else
		return gzgets(data->gz_file, data->buffer, BUFF_LENGTH);
}
コード例 #6
0
ファイル: linefile.c プロジェクト: bh0085/kent
boolean lineFileNext(struct lineFile *lf, char **retStart, int *retSize)
/* Fetch next line from file. */
{
char *buf = lf->buf;
int bytesInBuf = lf->bytesInBuf;
int endIx = lf->lineEnd;
boolean gotLf = FALSE;
int newStart;

if (lf->reuse)
    {
    lf->reuse = FALSE;
    if (retSize != NULL)
	*retSize = lf->lineEnd - lf->lineStart;
    *retStart = buf + lf->lineStart;
    if (lf->metaOutput && *retStart[0] == '#')
        metaDataAdd(lf, *retStart);
    return TRUE;
    }

#ifdef USE_TABIX
if (lf->tabix != NULL && lf->tabixIter != NULL)
    {
    // Just use line-oriented ti_read:
    int lineSize = 0;
    const char *line = ti_read(lf->tabix, lf->tabixIter, &lineSize);
    if (line == NULL)
	return FALSE;
    lf->bufOffsetInFile = -1;
    lf->bytesInBuf = lineSize;
    lf->lineIx = -1;
    lf->lineStart = 0;
    lf->lineEnd = lineSize;
    if (lineSize > lf->bufSize)
	// shouldn't be!  but just in case:
	lineFileExpandBuf(lf, lineSize * 2);
    safecpy(lf->buf, lf->bufSize, line);
    *retStart = lf->buf;
    if (retSize != NULL)
	*retSize = lineSize;
    return TRUE;
    }
#endif // USE_TABIX

determineNlType(lf, buf+endIx, bytesInBuf);

/* Find next end of line in buffer. */
switch(lf->nlType)
    {
    case nlt_unix:
    case nlt_dos:
	for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx)
	    {
	    if (buf[endIx] == '\n')
		{
		gotLf = TRUE;
		endIx += 1;
		break;
		}
	    }
	break;
    case nlt_mac:
	for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx)
	    {
	    if (buf[endIx] == '\r')
		{
		gotLf = TRUE;
		endIx += 1;
		break;
		}
	    }
	break;
    case nlt_undet:
	break;
    }

/* If not in buffer read in a new buffer's worth. */
while (!gotLf)
    {
    int oldEnd = lf->lineEnd;
    int sizeLeft = bytesInBuf - oldEnd;
    int bufSize = lf->bufSize;
    int readSize = bufSize - sizeLeft;

    if (oldEnd > 0 && sizeLeft > 0)
	{
	memmove(buf, buf+oldEnd, sizeLeft);
	}
    lf->bufOffsetInFile += oldEnd;
    if (lf->fd >= 0)
	readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize);
#ifdef USE_TABIX
    else if (lf->tabix != NULL && readSize > 0)
	{
	readSize = ti_bgzf_read(lf->tabix->fp, buf+sizeLeft, readSize);
	if (readSize < 1)
	    return FALSE;
	}
#endif // USE_TABIX
    else
        readSize = 0;

    if ((readSize == 0) && (endIx > oldEnd))
	{
	endIx = sizeLeft;
	buf[endIx] = 0;
	lf->bytesInBuf = newStart = lf->lineStart = 0;
	lf->lineEnd = endIx;
	++lf->lineIx;
	if (retSize != NULL)
	    *retSize = endIx - newStart;
	*retStart = buf + newStart;
        if (*retStart[0] == '#')
            metaDataAdd(lf, *retStart);
	return TRUE;
	}
    else if (readSize <= 0)
	{
	lf->bytesInBuf = lf->lineStart = lf->lineEnd = 0;
	return FALSE;
	}
    bytesInBuf = lf->bytesInBuf = readSize + sizeLeft;
    lf->lineEnd = 0;

    determineNlType(lf, buf+endIx, bytesInBuf);

    /* Look for next end of line.  */
    switch(lf->nlType)
	{
    	case nlt_unix:
	case nlt_dos:
	    for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx)
		{
		if (buf[endIx] == '\n')
		    {
		    endIx += 1;
		    gotLf = TRUE;
		    break;
		    }
		}
	    break;
	case nlt_mac:
	    for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx)
		{
		if (buf[endIx] == '\r')
		    {
		    endIx += 1;
		    gotLf = TRUE;
		    break;
		    }
		}
	    break;
	case nlt_undet:
	    break;
	}
    if (!gotLf && bytesInBuf == lf->bufSize)
        {
	if (bufSize >= 512*1024*1024)
	    {
	    errAbort("Line too long (more than %d chars) line %d of %s",
		lf->bufSize, lf->lineIx+1, lf->fileName);
	    }
	else
	    {
	    lineFileExpandBuf(lf, bufSize*2);
	    buf = lf->buf;
	    }
	}
    }

if (lf->zTerm)
    {
    buf[endIx-1] = 0;
    if ((lf->nlType == nlt_dos) && (buf[endIx-2]=='\r'))
	{
	buf[endIx-2] = 0;
	}
    }

lf->lineStart = newStart = lf->lineEnd;
lf->lineEnd = endIx;
++lf->lineIx;
if (retSize != NULL)
    *retSize = endIx - newStart;
*retStart = buf + newStart;
if (*retStart[0] == '#')
    metaDataAdd(lf, *retStart);
return TRUE;
}