Example #1
0
int vcfTabixBatchRead(struct vcfFile *vcff, char *chrom, int start, int end,
                      int maxErr, int maxRecords)
// Reads a batch of records from an opened and indexed VCF file, adding them to
// vcff->records and returning the count of new records added in this batch.
// Note: vcff->records will continue to be sorted, even if batches are loaded
// out of order.  Additionally, resulting vcff->records will contain no duplicates
// so returned count refects only the new records added, as opposed to all records
// in range.  If maxErr >= zero, then continue to parse until there are maxErr+1
// errors.  A maxErr less than zero does not stop and reports all errors.  Set
// maxErr to VCF_IGNORE_ERRS for silence.
{
int oldCount = slCount(vcff->records);

if (lineFileSetTabixRegion(vcff->lf, chrom, start, end))
    {
    struct vcfRecord *records = vcfParseData(vcff, maxRecords);
    if (records)
        {
        struct vcfRecord *lastRec = vcff->records;
        if (lastRec == NULL)
            vcff->records = records;
        else
            {
            // Considered just asserting the batches were in order, but a problem may
            // result when non-overlapping location windows pick up the same long variant.
            slSortMergeUniq(&(vcff->records), records, vcfRecordCmp, NULL);
            }
        }
    }

return slCount(vcff->records) - oldCount;
}
Example #2
0
File: vcf.c Project: bh0085/kent
struct vcfFile *vcfFileMayOpen(char *fileOrUrl, int maxErr, int maxRecords)
/* Parse a VCF file into a vcfFile object.  If maxErr not zero, then
 * continue to parse until this number of error have been reached.  A maxErr
 * less than zero does not stop and reports all errors. */
{
struct lineFile *lf = NULL;
if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) ||
    startsWith("https://", fileOrUrl))
    lf = netLineFileOpen(fileOrUrl);
else
    lf = lineFileMayOpen(fileOrUrl, TRUE);
struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr);
vcfParseData(vcff, maxRecords);
return vcff;
}
Example #3
0
File: vcf.c Project: bh0085/kent
struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end,
				    int maxErr, int maxRecords)
/* Parse header and rows within the given position range from a VCF file that has been
 * compressed and indexed by tabix into a vcfFile object; return NULL if or if file has
 * no items in range.
 * If maxErr not zero, then continue to parse until this number of error have been reached.
 * A maxErr less than zero does not stop and reports all errors. */
{
struct lineFile *lf = lineFileTabixMayOpen(fileOrUrl, TRUE);
struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr);
if (vcff == NULL)
    return NULL;
if (isNotEmpty(chrom) && start != end)
    {
    if (lineFileSetTabixRegion(lf, chrom, start, end))
	vcfParseData(vcff, maxRecords);
    }
return vcff;
}
Example #4
0
struct vcfFile *vcfFileMayOpen(char *fileOrUrl, int maxErr, int maxRecords, boolean parseAll)
/* Open fileOrUrl and parse VCF header; return NULL if unable.
 * If parseAll, then read in all lines, parse and store in
 * vcff->records; if maxErr >= zero, then continue to parse until
 * there are maxErr+1 errors.  A maxErr less than zero does not stop
 * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */
{
struct lineFile *lf = NULL;
if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) ||
    startsWith("https://", fileOrUrl))
    lf = netLineFileOpen(fileOrUrl);
else
    lf = lineFileMayOpen(fileOrUrl, TRUE);
struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr);
if (parseAll)
    {
    vcff->records = vcfParseData(vcff, maxRecords);
    lineFileClose(&(vcff->lf)); // Not sure why it is closed.  Angie?
    }
return vcff;
}
Example #5
0
struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end,
				    int maxErr, int maxRecords)
/* Open a VCF file that has been compressed and indexed by tabix and
 * parse VCF header, or return NULL if unable.  If chrom is non-NULL,
 * seek to the position range and parse all lines in range into
 * vcff->records.  If maxErr >= zero, then continue to parse until
 * there are maxErr+1 errors.  A maxErr less than zero does not stop
 * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */
{
struct lineFile *lf = lineFileTabixMayOpen(fileOrUrl, TRUE);
struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr);
if (vcff == NULL)
    return NULL;
if (isNotEmpty(chrom) && start != end)
    {
    if (lineFileSetTabixRegion(lf, chrom, start, end))
        {
        vcff->records = vcfParseData(vcff, maxRecords);
        lineFileClose(&(vcff->lf)); // Not sure why it is closed.  Angie?
        }
    }
return vcff;
}