Exemple #1
0
struct vcfRecord *vcfRecordFromRow(struct vcfFile *vcff, char **words)
/* Parse words from a VCF data line into a VCF record structure. */
{
struct vcfRecord *record = vcfFileAlloc(vcff, sizeof(struct vcfRecord));
record->file = vcff;
record->chrom = vcfFilePooledStr(vcff, words[0]);
record->chromStart = lineFileNeedNum(vcff->lf, words, 1) - 1;
// chromEnd may be overwritten by parseRefAndAlt and parseInfoColumn.
record->chromEnd = record->chromStart+1;
record->name = vcfFilePooledStr(vcff, words[2]);
parseRefAndAlt(vcff, record, words[3], words[4]);
record->qual = vcfFilePooledStr(vcff, words[5]);
parseFilterColumn(vcff, record, words[6]);
// ADDED BY BO PENG to get whole INFO column
record->unparsedInfoElements = vcfFilePooledStr(vcff, words[7]);
parseInfoColumn(vcff, record, words[7]);
if (vcff->genotypeCount > 0)
    {
    record->format = vcfFilePooledStr(vcff, words[8]);
    record->genotypeUnparsedStrings = vcfFileAlloc(vcff,
						   vcff->genotypeCount * sizeof(char *));
    int i;
    // Don't bother actually parsing all these until & unless we need the info:
    for (i = 0;  i < vcff->genotypeCount;  i++)
	record->genotypeUnparsedStrings[i] = vcfFileCloneStr(vcff, words[9+i]);
    }
return record;
}
Exemple #2
0
static void vcfParseData(struct vcfFile *vcff, int maxRecords)
/* Given a vcfFile into which the header has been parsed, and whose lineFile is positioned
 * at the beginning of a data row, parse and store all data rows from lineFile. */
{
if (vcff == NULL)
    return;
int recCount = 0, expected = 8;
if (vcff->genotypeCount > 0)
    expected = 9 + vcff->genotypeCount;
char *words[VCF_MAX_COLUMNS];
int wordCount;
while ((wordCount = lineFileChop(vcff->lf, words)) > 0)
    {
    if (maxRecords >= 0 && recCount >= maxRecords)
	break;
    lineFileExpectWords(vcff->lf, expected, wordCount);
    struct vcfRecord *record;
    AllocVar(record);
    record->file = vcff;
    record->chrom = vcfFilePooledStr(vcff, words[0]);
    record->chromStart = lineFileNeedNum(vcff->lf, words, 1) - 1;
    // chromEnd may be overwritten by parseRefAndAlt and parseInfoColumn.
    record->chromEnd = record->chromStart+1;
    record->name = vcfFilePooledStr(vcff, words[2]);
    parseRefAndAlt(vcff, record, words[3], words[4]);
    record->qual = vcfFilePooledStr(vcff, words[5]);
    parseFilterColumn(vcff, record, words[6]);
    parseInfoColumn(vcff, record, words[7]);
    if (vcff->genotypeCount > 0)
	{
	record->format = vcfFilePooledStr(vcff, words[8]);
	record->genotypeUnparsedStrings = vcfFileAlloc(vcff,
						       vcff->genotypeCount * sizeof(char *));
	int i;
	// Don't bother actually parsing all these until & unless we need the info:
	for (i = 0;  i < vcff->genotypeCount;  i++)
	    record->genotypeUnparsedStrings[i] = vcfFileCloneStr(vcff, words[9+i]);
	}
    slAddHead(&(vcff->records), record);
    recCount++;
    }
slReverse(&(vcff->records));
lineFileClose(&(vcff->lf));
}