bool tabix_streamer:: next() { if (_is_stream_end || (NULL==_tfp) || (NULL==_titer)) return false; int len; _linebuf = (char*) ti_read(_tfp, _titer, &len); _is_stream_end=(NULL == _linebuf); _is_record_set=(! _is_stream_end); if (_is_record_set) _record_no++; return _is_record_set; }
static PyObject * tabixiter_iternext(TabixIteratorObject *self) { const char *chunk; int len, i; chunk = ti_read(self->tbobj->tb, self->iter, &len); if (chunk != NULL) { PyObject *ret, *column; Py_ssize_t colidx; const char *ptr, *begin; ret = PyList_New(0); if (ret == NULL) return NULL; colidx = 0; ptr = begin = chunk; for (i = len; i > 0; i--, ptr++) if (*ptr == '\t') { column = PYOBJECT_FROM_STRING_AND_SIZE(begin, (Py_ssize_t)(ptr - begin)); if (column == NULL || PyList_Append(ret, column) == -1) { Py_DECREF(ret); return NULL; } Py_DECREF(column); begin = ptr + 1; colidx++; } column = PYOBJECT_FROM_STRING_AND_SIZE(begin, (Py_ssize_t)(ptr - begin)); if (column == NULL || PyList_Append(ret, column) == -1) { Py_DECREF(ret); return NULL; } Py_DECREF(column); return ret; } else return NULL; }
bool tabix_header_streamer:: next() { if (_is_stream_end || (NULL==_tfp) || (NULL==_titer)) return false; int len; _linebuf = (char*) ti_read(_tfp, _titer, &len); if (NULL == _linebuf) { _is_stream_end=true; } else { if ((strlen(_linebuf)<1) || (_linebuf[0] != '#')) { _is_stream_end=true; } } _is_record_set=(! _is_stream_end); return _is_record_set; }
int main(int argc, char *argv[]) { int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0; ti_conf_t conf = ti_conf_gff; const char *reheader = NULL; while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) { switch (c) { case 'B': bed_reg = 1; break; case '0': conf.preset |= TI_FLAG_UCSC; break; case 'S': skip = atoi(optarg); break; case 'c': meta = optarg[0]; break; case 'p': if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff; else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed; else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam; else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf; else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl; else { fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg); return 1; } break; case 's': conf.sc = atoi(optarg); break; case 'b': conf.bc = atoi(optarg); break; case 'e': conf.ec = atoi(optarg); break; case 'l': list_chrms = 1; break; case 'h': print_header = 1; break; case 'f': force = 1; break; case 'r': reheader = optarg; break; } } if (skip >= 0) conf.line_skip = skip; if (meta >= 0) conf.meta_char = meta; if (optind == argc) { fprintf(stderr, "\n"); fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n"); fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION); fprintf(stderr, "Usage: tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n"); fprintf(stderr, "Options: -p STR preset: gff, bed, sam, vcf, psltbl [gff]\n"); fprintf(stderr, " -s INT sequence name column [1]\n"); fprintf(stderr, " -b INT start column [4]\n"); fprintf(stderr, " -e INT end column; can be identical to '-b' [5]\n"); fprintf(stderr, " -S INT skip first INT lines [0]\n"); fprintf(stderr, " -c CHAR symbol for comment/meta lines [#]\n"); fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n"); fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n"); fprintf(stderr, " -0 zero-based coordinate\n"); fprintf(stderr, " -h print the header lines\n"); fprintf(stderr, " -l list chromosome names\n"); fprintf(stderr, " -f force to overwrite the index\n"); fprintf(stderr, "\n"); return 1; } if (list_chrms) { ti_index_t *idx; int i, n; const char **names; idx = ti_index_load(argv[optind]); if (idx == 0) { fprintf(stderr, "[main] fail to load the index file.\n"); return 1; } names = ti_seqname(idx, &n); for (i = 0; i < n; ++i) printf("%s\n", names[i]); free(names); ti_index_destroy(idx); return 0; } if (reheader) return reheader_file(reheader,argv[optind],conf.meta_char); struct stat stat_tbi,stat_vcf; char *fnidx = calloc(strlen(argv[optind]) + 5, 1); strcat(strcpy(fnidx, argv[optind]), ".tbi"); if (optind + 1 == argc) { if (force == 0) { if (stat(fnidx, &stat_tbi) == 0) { // Before complaining, check if the VCF file isn't newer. This is a common source of errors, // people tend not to notice that tabix failed stat(argv[optind], &stat_vcf); if ( stat_vcf.st_mtime <= stat_tbi.st_mtime ) { fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n"); free(fnidx); return 1; } } } if ( bgzf_check_bgzf(argv[optind])!=1 ) { fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]); free(fnidx); return 1; } return ti_index_build(argv[optind], &conf); } { // retrieve tabix_t *t; // Common source of errors: new VCF is used with an old index stat(fnidx, &stat_tbi); stat(argv[optind], &stat_vcf); if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime ) { fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n"); free(fnidx); return 1; } free(fnidx); if ((t = ti_open(argv[optind], 0)) == 0) { fprintf(stderr, "[main] fail to open the data file.\n"); return 1; } if (strcmp(argv[optind+1], ".") == 0) { // retrieve all ti_iter_t iter; const char *s; int len; iter = ti_query(t, 0, 0, 0); while ((s = ti_read(t, iter, &len)) != 0) { fputs(s, stdout); fputc('\n', stdout); } ti_iter_destroy(iter); } else { // retrieve from specified regions int i, len; ti_iter_t iter; const char *s; const ti_conf_t *idxconf; if (ti_lazy_index_load(t) < 0 && bed_reg == 0) { fprintf(stderr,"[tabix] failed to load the index file.\n"); return 1; } idxconf = ti_get_conf(t->idx); if ( print_header ) { // If requested, print the header lines here iter = ti_query(t, 0, 0, 0); while ((s = ti_read(t, iter, &len)) != 0) { if ((int)(*s) != idxconf->meta_char) break; fputs(s, stdout); fputc('\n', stdout); } ti_iter_destroy(iter); } if (bed_reg) { extern int bed_overlap(const void *_h, const char *chr, int beg, int end); extern void *bed_read(const char *fn); extern void bed_destroy(void *_h); const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available void *bed = bed_read(argv[optind+1]); // load the BED file ti_interval_t intv; if (bed == 0) { fprintf(stderr, "[main] fail to read the BED file.\n"); return 1; } iter = ti_query(t, 0, 0, 0); while ((s = ti_read(t, iter, &len)) != 0) { int c; ti_get_intv(conf_, len, (char*)s, &intv); c = *intv.se; *intv.se = '\0'; if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) { *intv.se = c; puts(s); } *intv.se = c; } ti_iter_destroy(iter); bed_destroy(bed); } else { for (i = optind + 1; i < argc; ++i) { int tid, beg, end; if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) { iter = ti_queryi(t, tid, beg, end); while ((s = ti_read(t, iter, &len)) != 0) { fputs(s, stdout); fputc('\n', stdout); } ti_iter_destroy(iter); } // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n"); } } } ti_close(t); } return 0; }
static char * nextLine(BCFReaderData * data) { if (data->tabix_iterator) return ti_read(data->tabix_file, data->tabix_iterator, 0); else return gzgets(data->gz_file, data->buffer, BUFF_LENGTH); }
boolean lineFileNext(struct lineFile *lf, char **retStart, int *retSize) /* Fetch next line from file. */ { char *buf = lf->buf; int bytesInBuf = lf->bytesInBuf; int endIx = lf->lineEnd; boolean gotLf = FALSE; int newStart; if (lf->reuse) { lf->reuse = FALSE; if (retSize != NULL) *retSize = lf->lineEnd - lf->lineStart; *retStart = buf + lf->lineStart; if (lf->metaOutput && *retStart[0] == '#') metaDataAdd(lf, *retStart); return TRUE; } #ifdef USE_TABIX if (lf->tabix != NULL && lf->tabixIter != NULL) { // Just use line-oriented ti_read: int lineSize = 0; const char *line = ti_read(lf->tabix, lf->tabixIter, &lineSize); if (line == NULL) return FALSE; lf->bufOffsetInFile = -1; lf->bytesInBuf = lineSize; lf->lineIx = -1; lf->lineStart = 0; lf->lineEnd = lineSize; if (lineSize > lf->bufSize) // shouldn't be! but just in case: lineFileExpandBuf(lf, lineSize * 2); safecpy(lf->buf, lf->bufSize, line); *retStart = lf->buf; if (retSize != NULL) *retSize = lineSize; return TRUE; } #endif // USE_TABIX determineNlType(lf, buf+endIx, bytesInBuf); /* Find next end of line in buffer. */ switch(lf->nlType) { case nlt_unix: case nlt_dos: for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx) { if (buf[endIx] == '\n') { gotLf = TRUE; endIx += 1; break; } } break; case nlt_mac: for (endIx = lf->lineEnd; endIx < bytesInBuf; ++endIx) { if (buf[endIx] == '\r') { gotLf = TRUE; endIx += 1; break; } } break; case nlt_undet: break; } /* If not in buffer read in a new buffer's worth. */ while (!gotLf) { int oldEnd = lf->lineEnd; int sizeLeft = bytesInBuf - oldEnd; int bufSize = lf->bufSize; int readSize = bufSize - sizeLeft; if (oldEnd > 0 && sizeLeft > 0) { memmove(buf, buf+oldEnd, sizeLeft); } lf->bufOffsetInFile += oldEnd; if (lf->fd >= 0) readSize = lineFileLongNetRead(lf->fd, buf+sizeLeft, readSize); #ifdef USE_TABIX else if (lf->tabix != NULL && readSize > 0) { readSize = ti_bgzf_read(lf->tabix->fp, buf+sizeLeft, readSize); if (readSize < 1) return FALSE; } #endif // USE_TABIX else readSize = 0; if ((readSize == 0) && (endIx > oldEnd)) { endIx = sizeLeft; buf[endIx] = 0; lf->bytesInBuf = newStart = lf->lineStart = 0; lf->lineEnd = endIx; ++lf->lineIx; if (retSize != NULL) *retSize = endIx - newStart; *retStart = buf + newStart; if (*retStart[0] == '#') metaDataAdd(lf, *retStart); return TRUE; } else if (readSize <= 0) { lf->bytesInBuf = lf->lineStart = lf->lineEnd = 0; return FALSE; } bytesInBuf = lf->bytesInBuf = readSize + sizeLeft; lf->lineEnd = 0; determineNlType(lf, buf+endIx, bytesInBuf); /* Look for next end of line. */ switch(lf->nlType) { case nlt_unix: case nlt_dos: for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx) { if (buf[endIx] == '\n') { endIx += 1; gotLf = TRUE; break; } } break; case nlt_mac: for (endIx = sizeLeft; endIx <bytesInBuf; ++endIx) { if (buf[endIx] == '\r') { endIx += 1; gotLf = TRUE; break; } } break; case nlt_undet: break; } if (!gotLf && bytesInBuf == lf->bufSize) { if (bufSize >= 512*1024*1024) { errAbort("Line too long (more than %d chars) line %d of %s", lf->bufSize, lf->lineIx+1, lf->fileName); } else { lineFileExpandBuf(lf, bufSize*2); buf = lf->buf; } } } if (lf->zTerm) { buf[endIx-1] = 0; if ((lf->nlType == nlt_dos) && (buf[endIx-2]=='\r')) { buf[endIx-2] = 0; } } lf->lineStart = newStart = lf->lineEnd; lf->lineEnd = endIx; ++lf->lineIx; if (retSize != NULL) *retSize = endIx - newStart; *retStart = buf + newStart; if (*retStart[0] == '#') metaDataAdd(lf, *retStart); return TRUE; }