int vcfTabixBatchRead(struct vcfFile *vcff, char *chrom, int start, int end, int maxErr, int maxRecords) // Reads a batch of records from an opened and indexed VCF file, adding them to // vcff->records and returning the count of new records added in this batch. // Note: vcff->records will continue to be sorted, even if batches are loaded // out of order. Additionally, resulting vcff->records will contain no duplicates // so returned count refects only the new records added, as opposed to all records // in range. If maxErr >= zero, then continue to parse until there are maxErr+1 // errors. A maxErr less than zero does not stop and reports all errors. Set // maxErr to VCF_IGNORE_ERRS for silence. { int oldCount = slCount(vcff->records); if (lineFileSetTabixRegion(vcff->lf, chrom, start, end)) { struct vcfRecord *records = vcfParseData(vcff, maxRecords); if (records) { struct vcfRecord *lastRec = vcff->records; if (lastRec == NULL) vcff->records = records; else { // Considered just asserting the batches were in order, but a problem may // result when non-overlapping location windows pick up the same long variant. slSortMergeUniq(&(vcff->records), records, vcfRecordCmp, NULL); } } } return slCount(vcff->records) - oldCount; }
struct vcfFile *vcfFileMayOpen(char *fileOrUrl, int maxErr, int maxRecords) /* Parse a VCF file into a vcfFile object. If maxErr not zero, then * continue to parse until this number of error have been reached. A maxErr * less than zero does not stop and reports all errors. */ { struct lineFile *lf = NULL; if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) || startsWith("https://", fileOrUrl)) lf = netLineFileOpen(fileOrUrl); else lf = lineFileMayOpen(fileOrUrl, TRUE); struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); vcfParseData(vcff, maxRecords); return vcff; }
struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end, int maxErr, int maxRecords) /* Parse header and rows within the given position range from a VCF file that has been * compressed and indexed by tabix into a vcfFile object; return NULL if or if file has * no items in range. * If maxErr not zero, then continue to parse until this number of error have been reached. * A maxErr less than zero does not stop and reports all errors. */ { struct lineFile *lf = lineFileTabixMayOpen(fileOrUrl, TRUE); struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); if (vcff == NULL) return NULL; if (isNotEmpty(chrom) && start != end) { if (lineFileSetTabixRegion(lf, chrom, start, end)) vcfParseData(vcff, maxRecords); } return vcff; }
struct vcfFile *vcfFileMayOpen(char *fileOrUrl, int maxErr, int maxRecords, boolean parseAll) /* Open fileOrUrl and parse VCF header; return NULL if unable. * If parseAll, then read in all lines, parse and store in * vcff->records; if maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */ { struct lineFile *lf = NULL; if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) || startsWith("https://", fileOrUrl)) lf = netLineFileOpen(fileOrUrl); else lf = lineFileMayOpen(fileOrUrl, TRUE); struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); if (parseAll) { vcff->records = vcfParseData(vcff, maxRecords); lineFileClose(&(vcff->lf)); // Not sure why it is closed. Angie? } return vcff; }
struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end, int maxErr, int maxRecords) /* Open a VCF file that has been compressed and indexed by tabix and * parse VCF header, or return NULL if unable. If chrom is non-NULL, * seek to the position range and parse all lines in range into * vcff->records. If maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */ { struct lineFile *lf = lineFileTabixMayOpen(fileOrUrl, TRUE); struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); if (vcff == NULL) return NULL; if (isNotEmpty(chrom) && start != end) { if (lineFileSetTabixRegion(lf, chrom, start, end)) { vcff->records = vcfParseData(vcff, maxRecords); lineFileClose(&(vcff->lf)); // Not sure why it is closed. Angie? } } return vcff; }