Ejemplo n.º 1
0
static void vcfInfoDetails(struct vcfRecord *rec)
/* Expand info keys to descriptions, then print out keys and values. */
{
if (rec->infoCount == 0)
    return;
struct vcfFile *vcff = rec->file;
puts("<B>INFO column annotations:</B><BR>");
puts("<TABLE border=0 cellspacing=0 cellpadding=2>");
int i;
for (i = 0;  i < rec->infoCount;  i++)
    {
    struct vcfInfoElement *el = &(rec->infoElements[i]);
    const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
    printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD>",
           el->key);
    int j;
    enum vcfInfoType type = def ? def->type : vcfInfoString;
    if (type == vcfInfoFlag && el->count == 0)
	printf("Yes"); // no values, so we can't call vcfPrintDatum...
    // However, if this is older VCF, type vcfInfoFlag might have a value.
    if (looksTabular(def, el))
        {
        // Make a special display below
        printf("<em>see below</em>");
        }
    else
        {
        for (j = 0;  j < el->count;  j++)
            {
            if (j > 0)
                printf(", ");
            if (el->missingData[j])
                printf(".");
            else
                vcfPrintDatum(stdout, el->values[j], type);
            }
        }
    if (def != NULL)
	printf("&nbsp;&nbsp;</TD><TD>%s", def->description);
    else
	printf("</TD><TD>");
    printf("</TD></TR>\n");
    }
puts("</TABLE>");
// Now show the tabular fields, if any
for (i = 0;  i < rec->infoCount;  i++)
    {
    struct vcfInfoElement *el = &(rec->infoElements[i]);
    const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
    if (looksTabular(def, el))
        {
        puts("<BR>");
        printf("<B>%s</B>: %s<BR>\n", el->key, def->description);
        puts("<TABLE class='stdTbl'>");
        printTabularHeaderRow(def);
        printTabularData(el);
        puts("</TABLE>");
        }
    }
}
Ejemplo n.º 2
0
static enum vcfInfoType typeForInfoKey(struct vcfFile *vcff, const char *key)
/* Look up the type of INFO component key, in the definitions from the header,
 * and failing that, from the keys reserved in the spec. */
{
struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
if (def == NULL)
    {
    vcfFileErr(vcff, "There is no INFO header defining \"%s\"", key);
    // default to string so we can display value as-is:
    return vcfInfoString;
    }
return def->type;
}
Ejemplo n.º 3
0
void vcfInfoElsToString(struct dyString *dy, struct vcfFile *vcff, struct vcfRecord *rec)
/* Unpack rec's typed infoElements to semicolon-sep'd string in dy.*/
{
dyStringClear(dy);
if (rec->infoCount == 0)
    dyStringAppendC(dy, '.');
int i;
for (i = 0;  i < rec->infoCount;  i++)
    {
    if (i > 0)
	dyStringAppendC(dy, ';');
    const struct vcfInfoElement *el = &(rec->infoElements[i]);
    const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
    enum vcfInfoType type = def? def->type : vcfInfoNoType;
    dyStringAppend(dy, el->key);
    if (el->count > 0)
	dyStringAppendC(dy, '=');
    int j;
    for (j = 0;  j < el->count;  j++)
	{
	if (j > 0)
	    dyStringAppendC(dy, ',');
	if (el->missingData[j])
	    {
	    dyStringAppend(dy, ".");
	    continue;
	    }
	union vcfDatum dat = el->values[j];
	switch (type)
	    {
	    case vcfInfoInteger:
		dyStringPrintf(dy, "%d", dat.datInt);
		break;
	    case vcfInfoFloat:
		{
		// use big precision and erase trailing zeros:
		char fbuf[64];
		safef(fbuf, sizeof(fbuf), "%.16lf", dat.datFloat);
		int i;
		for (i = strlen(fbuf) - 1;  i > 0;  i--)
		    if (fbuf[i] == '0')
			fbuf[i] = '\0';
		    else
			break;
		dyStringAppend(dy, fbuf);
		}
		break;
	    case vcfInfoCharacter:
		dyStringAppendC(dy, dat.datChar);
		break;
	    case vcfInfoFlag: // Flags could have values in older VCF
	    case vcfInfoNoType:
	    case vcfInfoString:
		dyStringAppend(dy, dat.datString);
		break;
	    default:
		errAbort("Invalid vcfInfoType %d (how did this get past parser?", type);
	    }
	}
    }
}
Ejemplo n.º 4
0
static boolean minFreqFail(struct vcfRecord *record, double minFreq)
/* Return TRUE if record's INFO include AF (alternate allele frequencies) or AC+AN
 * (alternate allele counts and total count of observed alleles) and the minor allele
 * frequency < minFreq -- or rather, major allele frequency > (1 - minFreq) because
 * variants with > 2 alleles might have some significant minor frequencies along with
 * tiny minor frequencies). */
{
struct vcfFile *vcff = record->file;
boolean gotInfo = FALSE;
double refFreq = 1.0;
double maxAltFreq = 0.0;
int i;
const struct vcfInfoElement *afEl = vcfRecordFindInfo(record, "AF");
const struct vcfInfoDef *afDef = vcfInfoDefForKey(vcff, "AF");
if (afEl != NULL && afDef != NULL && afDef->type == vcfInfoFloat)
    {
    // If INFO includes alt allele freqs, use them directly.
    gotInfo = TRUE;
    for (i = 0;  i < afEl->count;  i++)
	{
	if (afEl->missingData[i])
	    continue;
	double altFreq = afEl->values[i].datFloat;
	refFreq -= altFreq;
	if (altFreq > maxAltFreq)
	    maxAltFreq = altFreq;
	}
    }
else
    {
    // Calculate alternate allele freqs from AC and AN:
    const struct vcfInfoElement *acEl = vcfRecordFindInfo(record, "AC");
    const struct vcfInfoDef *acDef = vcfInfoDefForKey(vcff, "AC");
    const struct vcfInfoElement *anEl = vcfRecordFindInfo(record, "AN");
    const struct vcfInfoDef *anDef = vcfInfoDefForKey(vcff, "AN");
    if (acEl != NULL && acDef != NULL && acDef->type == vcfInfoInteger &&
	anEl != NULL && anDef != NULL && anDef->type == vcfInfoInteger && anEl->count == 1 &&
	anEl->missingData[0] == FALSE)
	{
	gotInfo = TRUE;
	int totalCount = anEl->values[0].datInt;
	for (i = 0;  i < acEl->count;  i++)
	    {
	    if (acEl->missingData[i])
		continue;
	    int altCount = acEl->values[i].datInt;
	    double altFreq = (double)altCount / totalCount;
	    refFreq -= altFreq;
	    if (altFreq < maxAltFreq)
		maxAltFreq = altFreq;
	    }
	}
    else
        // Use MAF for alternate allele freqs from MAF:
        {
        const struct vcfInfoElement *mafEl = vcfRecordFindInfo(record, "MAF");
        const struct vcfInfoDef *mafDef = vcfInfoDefForKey(vcff, "MAF");
        if (mafEl != NULL && mafDef != NULL && mafDef->type == vcfInfoString
        && startsWith("Minor Allele Frequency",mafDef->description))
            {
            // If INFO includes alt allele freqs, use them directly.
            gotInfo = TRUE;

            if (mafEl->count >= 1 && !mafEl->missingData[mafEl->count-1])
                {
                char data[64];
                safecpy(data,sizeof(data),mafEl->values[mafEl->count-1].datString);
                maxAltFreq = atof(lastWordInLine(data));
                refFreq -= maxAltFreq;
                }
            }
        }
    }
if (gotInfo)
    {
    double majorAlFreq = max(refFreq, maxAltFreq);
    if (majorAlFreq > (1.0 - minFreq))
	return TRUE;
    }
return FALSE;
}