static void vcfInfoDetails(struct vcfRecord *rec) /* Expand info keys to descriptions, then print out keys and values. */ { if (rec->infoCount == 0) return; struct vcfFile *vcff = rec->file; puts("<B>INFO column annotations:</B><BR>"); puts("<TABLE border=0 cellspacing=0 cellpadding=2>"); int i; for (i = 0; i < rec->infoCount; i++) { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD>", el->key); int j; enum vcfInfoType type = def ? def->type : vcfInfoString; if (type == vcfInfoFlag && el->count == 0) printf("Yes"); // no values, so we can't call vcfPrintDatum... // However, if this is older VCF, type vcfInfoFlag might have a value. if (looksTabular(def, el)) { // Make a special display below printf("<em>see below</em>"); } else { for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); if (el->missingData[j]) printf("."); else vcfPrintDatum(stdout, el->values[j], type); } } if (def != NULL) printf(" </TD><TD>%s", def->description); else printf("</TD><TD>"); printf("</TD></TR>\n"); } puts("</TABLE>"); // Now show the tabular fields, if any for (i = 0; i < rec->infoCount; i++) { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); if (looksTabular(def, el)) { puts("<BR>"); printf("<B>%s</B>: %s<BR>\n", el->key, def->description); puts("<TABLE class='stdTbl'>"); printTabularHeaderRow(def); printTabularData(el); puts("</TABLE>"); } } }
static enum vcfInfoType typeForInfoKey(struct vcfFile *vcff, const char *key) /* Look up the type of INFO component key, in the definitions from the header, * and failing that, from the keys reserved in the spec. */ { struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key); if (def == NULL) { vcfFileErr(vcff, "There is no INFO header defining \"%s\"", key); // default to string so we can display value as-is: return vcfInfoString; } return def->type; }
void vcfInfoElsToString(struct dyString *dy, struct vcfFile *vcff, struct vcfRecord *rec) /* Unpack rec's typed infoElements to semicolon-sep'd string in dy.*/ { dyStringClear(dy); if (rec->infoCount == 0) dyStringAppendC(dy, '.'); int i; for (i = 0; i < rec->infoCount; i++) { if (i > 0) dyStringAppendC(dy, ';'); const struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); enum vcfInfoType type = def? def->type : vcfInfoNoType; dyStringAppend(dy, el->key); if (el->count > 0) dyStringAppendC(dy, '='); int j; for (j = 0; j < el->count; j++) { if (j > 0) dyStringAppendC(dy, ','); if (el->missingData[j]) { dyStringAppend(dy, "."); continue; } union vcfDatum dat = el->values[j]; switch (type) { case vcfInfoInteger: dyStringPrintf(dy, "%d", dat.datInt); break; case vcfInfoFloat: { // use big precision and erase trailing zeros: char fbuf[64]; safef(fbuf, sizeof(fbuf), "%.16lf", dat.datFloat); int i; for (i = strlen(fbuf) - 1; i > 0; i--) if (fbuf[i] == '0') fbuf[i] = '\0'; else break; dyStringAppend(dy, fbuf); } break; case vcfInfoCharacter: dyStringAppendC(dy, dat.datChar); break; case vcfInfoFlag: // Flags could have values in older VCF case vcfInfoNoType: case vcfInfoString: dyStringAppend(dy, dat.datString); break; default: errAbort("Invalid vcfInfoType %d (how did this get past parser?", type); } } } }
static boolean minFreqFail(struct vcfRecord *record, double minFreq) /* Return TRUE if record's INFO include AF (alternate allele frequencies) or AC+AN * (alternate allele counts and total count of observed alleles) and the minor allele * frequency < minFreq -- or rather, major allele frequency > (1 - minFreq) because * variants with > 2 alleles might have some significant minor frequencies along with * tiny minor frequencies). */ { struct vcfFile *vcff = record->file; boolean gotInfo = FALSE; double refFreq = 1.0; double maxAltFreq = 0.0; int i; const struct vcfInfoElement *afEl = vcfRecordFindInfo(record, "AF"); const struct vcfInfoDef *afDef = vcfInfoDefForKey(vcff, "AF"); if (afEl != NULL && afDef != NULL && afDef->type == vcfInfoFloat) { // If INFO includes alt allele freqs, use them directly. gotInfo = TRUE; for (i = 0; i < afEl->count; i++) { if (afEl->missingData[i]) continue; double altFreq = afEl->values[i].datFloat; refFreq -= altFreq; if (altFreq > maxAltFreq) maxAltFreq = altFreq; } } else { // Calculate alternate allele freqs from AC and AN: const struct vcfInfoElement *acEl = vcfRecordFindInfo(record, "AC"); const struct vcfInfoDef *acDef = vcfInfoDefForKey(vcff, "AC"); const struct vcfInfoElement *anEl = vcfRecordFindInfo(record, "AN"); const struct vcfInfoDef *anDef = vcfInfoDefForKey(vcff, "AN"); if (acEl != NULL && acDef != NULL && acDef->type == vcfInfoInteger && anEl != NULL && anDef != NULL && anDef->type == vcfInfoInteger && anEl->count == 1 && anEl->missingData[0] == FALSE) { gotInfo = TRUE; int totalCount = anEl->values[0].datInt; for (i = 0; i < acEl->count; i++) { if (acEl->missingData[i]) continue; int altCount = acEl->values[i].datInt; double altFreq = (double)altCount / totalCount; refFreq -= altFreq; if (altFreq < maxAltFreq) maxAltFreq = altFreq; } } else // Use MAF for alternate allele freqs from MAF: { const struct vcfInfoElement *mafEl = vcfRecordFindInfo(record, "MAF"); const struct vcfInfoDef *mafDef = vcfInfoDefForKey(vcff, "MAF"); if (mafEl != NULL && mafDef != NULL && mafDef->type == vcfInfoString && startsWith("Minor Allele Frequency",mafDef->description)) { // If INFO includes alt allele freqs, use them directly. gotInfo = TRUE; if (mafEl->count >= 1 && !mafEl->missingData[mafEl->count-1]) { char data[64]; safecpy(data,sizeof(data),mafEl->values[mafEl->count-1].datString); maxAltFreq = atof(lastWordInLine(data)); refFreq -= maxAltFreq; } } } } if (gotInfo) { double majorAlFreq = max(refFreq, maxAltFreq); if (majorAlFreq > (1.0 - minFreq)) return TRUE; } return FALSE; }