struct meta *metaNextStanza(struct lineFile *lf) /* Return next stanza in a meta file. Does not set parent/child/next pointers. * Returns NULL at end of file. Does a little error checking, making sure * that indentation level is consistent across all lines of stanza. Returns * indentation level. */ { /* See if anything left in file, and if not return. */ if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; /* Allocate return structure and vars to help parse. */ struct meta *meta; AllocVar(meta); struct dyString *dy = dyStringNew(256); char *tag,*val; /* Loop to get all tags in stanza. */ boolean firstTime = TRUE; int initialIndent = 0; for (;;) { dyStringClear(dy); if (!raNextTagVal(lf, &tag, &val, dy)) break; /* Make tag/val and add it to list. */ struct metaTagVal *mtv; AllocVar(mtv); mtv->tag = cloneString(tag); mtv->val = cloneString(val); slAddHead(&meta->tagList, mtv); /* Check indentation. */ int indent = countLeadingSpacesDetabbing(dy->string, 8); if (firstTime) { initialIndent = indent; firstTime = FALSE; } else { if (indent != initialIndent) { warn("Error line %d of %s\n", lf->lineIx, lf->fileName); warn("Indentation level %d doesn't match level %d at start of stanza.", indent, initialIndent); if (strchr(dy->string, '\t')) warn("There are tabs in the indentation, be sure tab stop is set to 8 spaces."); noWarnAbort(); } } } slReverse(&meta->tagList); /* Set up remaining fields and return. */ assert(meta->tagList != NULL); meta->name = meta->tagList->val; meta->indent = initialIndent; return meta; }
struct slPair *raNextStanzAsPairs(struct lineFile *lf) // Return ra stanza as an slPair list instead of a hash. Handy to preserve the // order. Will ignore '#' comments and joins continued lines (ending in '\'). { struct slPair *list = NULL; char *key, *val; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; while (raNextTagVal(lf, &key, &val, NULL)) { slPairAdd(&list, key, cloneString(val)); // key gets cloned by slPairAdd } slReverse(&list); return list; }
struct hash *raNextStanza(struct lineFile *lf) // Return a hash containing next record. // Will ignore '#' comments and joins continued lines (ending in '\'). // Returns NULL at end of file. freeHash this when done. // Note this will free the hash keys and values as well, // so you'll have to cloneMem them if you want them for later. { struct hash *hash = NULL; char *key, *val; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; while (raNextTagVal(lf, &key, &val, NULL)) { if (hash == NULL) hash = newHash(7); hashAdd(hash, key, lmCloneString(hash->lm, val)); } return hash; }
static struct raRecord *readRecordsFromFile(struct raFile *file, struct dyString *dy, struct lm *lm) /* Read all the records in a file and return as a list. The dy parameter returns the * last bits of the file (after the last record). */ { char *fileName = file->name; struct raRecord *r, *rList = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); while (raSkipLeadingEmptyLines(lf, dy)) { /* Create a tag structure in local memory. */ lmAllocVar(lm, r); r->startLineIx = lf->lineIx; char *name, *val; while (raNextTagVal(lf, &name, &val, dy)) { struct raTag *tag; lmAllocVar(lm, tag); tag->name = lmCloneString(lm, name); tag->val = lmCloneString(lm, val); tag->text = lmCloneString(lm, dy->string); if (sameString(name, glKeyField)) r->key = lmCloneFirstWord(lm, tag->val); slAddHead(&r->tagList, tag); dyStringClear(dy); } if (dy->stringSize > 0) { r->endComments = lmCloneString(lm, dy->string); } slReverse(&r->tagList); r->endLineIx = lf->lineIx; r->file = file; slAddHead(&rList, r); } lineFileClose(&lf); slReverse(&rList); return rList; }
struct edwFastqFile *edwFastqFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an edwFastqFile. */ { enum fields { readCountField, baseCountField, readSizeMeanField, readSizeStdField, readSizeMinField, readSizeMaxField, qualMeanField, qualStdField, qualMinField, qualMaxField, qualTypeField, qualZeroField, atRatioField, aRatioField, cRatioField, gRatioField, tRatioField, nRatioField, posCountField, qualPosField, aAtPosField, cAtPosField, gAtPosField, tAtPosField, nAtPosField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct edwFastqFile *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case readCountField: { el->readCount = sqlLongLong(val); break; } case baseCountField: { el->baseCount = sqlLongLong(val); break; } case readSizeMeanField: { el->readSizeMean = sqlDouble(val); break; } case readSizeStdField: { el->readSizeStd = sqlDouble(val); break; } case readSizeMinField: { el->readSizeMin = sqlSigned(val); break; } case readSizeMaxField: { el->readSizeMax = sqlSigned(val); break; } case qualMeanField: { el->qualMean = sqlDouble(val); break; } case qualStdField: { el->qualStd = sqlDouble(val); break; } case qualMinField: { el->qualMin = sqlDouble(val); break; } case qualMaxField: { el->qualMax = sqlDouble(val); break; } case qualTypeField: { el->qualType = cloneString(val); break; } case qualZeroField: { el->qualZero = sqlSigned(val); break; } case atRatioField: { el->atRatio = sqlDouble(val); break; } case aRatioField: { el->aRatio = sqlDouble(val); break; } case cRatioField: { el->cRatio = sqlDouble(val); break; } case gRatioField: { el->gRatio = sqlDouble(val); break; } case tRatioField: { el->tRatio = sqlDouble(val); break; } case nRatioField: { el->nRatio = sqlDouble(val); break; } case posCountField: { int arraySize = sqlSigned(val); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "posCount"); break; } case qualPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->qualPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "qualPos"); break; } case aAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->aAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "aAtPos"); break; } case cAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->cAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "cAtPos"); break; } case gAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->gAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "gAtPos"); break; } case tAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->tAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "tAtPos"); break; } case nAtPosField: { int arraySize; sqlDoubleDynamicArray(val, &el->nAtPos, &arraySize); raToStructArraySignedSizer(lf, arraySize, &el->posCount, "nAtPos"); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
struct edwQaWigSpot *edwQaWigSpotFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an edwQaWigSpot. */ { enum fields { spotRatioField, enrichmentField, basesInGenomeField, basesInSpotsField, sumSignalField, spotSumSignalField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct edwQaWigSpot *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case spotRatioField: { el->spotRatio = sqlDouble(val); break; } case enrichmentField: { el->enrichment = sqlDouble(val); break; } case basesInGenomeField: { el->basesInGenome = sqlLongLong(val); break; } case basesInSpotsField: { el->basesInSpots = sqlLongLong(val); break; } case sumSignalField: { el->sumSignal = sqlDouble(val); break; } case spotSumSignalField: { el->spotSumSignal = sqlDouble(val); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
struct cdwBamFile *cdwBamFileFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an cdwBamFile. */ { enum fields { isPairedField, isSortedByTargetField, readCountField, readBaseCountField, mappedCountField, uniqueMappedCountField, readSizeMeanField, readSizeStdField, readSizeMinField, readSizeMaxField, u4mReadCountField, u4mUniquePosField, u4mUniqueRatioField, targetBaseCountField, targetSeqCountField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct cdwBamFile *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case isPairedField: { el->isPaired = sqlSigned(val); break; } case isSortedByTargetField: { el->isSortedByTarget = sqlSigned(val); break; } case readCountField: { el->readCount = sqlLongLong(val); break; } case readBaseCountField: { el->readBaseCount = sqlLongLong(val); break; } case mappedCountField: { el->mappedCount = sqlLongLong(val); break; } case uniqueMappedCountField: { el->uniqueMappedCount = sqlLongLong(val); break; } case readSizeMeanField: { el->readSizeMean = sqlDouble(val); break; } case readSizeStdField: { el->readSizeStd = sqlDouble(val); break; } case readSizeMinField: { el->readSizeMin = sqlSigned(val); break; } case readSizeMaxField: { el->readSizeMax = sqlSigned(val); break; } case u4mReadCountField: { el->u4mReadCount = sqlSigned(val); break; } case u4mUniquePosField: { el->u4mUniquePos = sqlSigned(val); break; } case u4mUniqueRatioField: { el->u4mUniqueRatio = sqlDouble(val); break; } case targetBaseCountField: { el->targetBaseCount = sqlLongLong(val); break; } case targetSeqCountField: { el->targetSeqCount = sqlUnsigned(val); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }
struct tagStorm *tagStormFromFile(char *fileName) /* Load up all tags from file. */ { int depth = 0, maxDepth = 32; int indentStack[maxDepth]; indentStack[0] = 0; /* Open up file first thing. Abort if there's a problem here. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); /* Set up new empty tag storm and get local pointer to memory pool. */ struct tagStorm *tagStorm = tagStormNew(fileName); struct lm *lm = tagStorm->lm; struct tagStanza *stanza, *parent = NULL, *lastStanza = NULL; int currentIndent = 0; int stanzaCount = 0; int tagCount = 0; while (raSkipLeadingEmptyLines(lf, NULL)) { ++stanzaCount; char *tag, *val; int stanzaIndent, tagIndent; lmAllocVar(lm, stanza); struct slPair *pairList = NULL, *pair; while (raNextTagValWithIndent(lf, &tag, &val, NULL, &tagIndent)) { lmAllocVar(lm, pair); pair->name = lmCloneString(lm, tag); pair->val = lmCloneString(lm, val); if (pairList == NULL) /* If this is first tag of a new stanza check indentation * and put stanza in appropriate level of hierarchy */ { if (tagIndent != currentIndent) { stanzaIndent = tagIndent; if (stanzaIndent > currentIndent) { if (++depth >= maxDepth) errAbort("Tags nested too deep line %d of %s. Max nesting is %d", lf->lineIx, lf->fileName, maxDepth); indentStack[depth] = stanzaIndent; if (lastStanza == NULL) errAbort("Initial stanza needs to be non-indented line %d of %s", lf->lineIx, lf->fileName); parent = lastStanza; } else /* going up */ { /* Find stanza in parent chain at same level of indentation. This * will be an older sibling */ struct tagStanza *olderSibling; for (olderSibling = parent; olderSibling != NULL; olderSibling = olderSibling->parent) { --depth; if (indentStack[depth] == stanzaIndent) break; } if (olderSibling == NULL) { warn("Indentation inconsistent line %d of %s.", lf->lineIx, lf->fileName); warn("If you are using tabs, check your tab stop is set to 8."); warn("Otherwise check that when you are reducing indentation in a stanza"); warn("that it is the same as the previous stanza at the same level."); noWarnAbort(); } parent = olderSibling->parent; } currentIndent = tagIndent; } if (parent == NULL) slAddHead(&tagStorm->forest, stanza); else slAddHead(&parent->children, stanza); stanza->parent = parent; pairList = pair; lastStanza = stanza; } else { if (tagIndent != currentIndent) errAbort("Tags in stanza inconsistently indented line %d of %s", lf->lineIx, lf->fileName); slAddHead(&pairList, pair); } ++tagCount; } slReverse(&pairList); stanza->tagList = pairList; } lineFileClose(&lf); rReverseStanzaList(&tagStorm->forest); return tagStorm; }
struct cdwQaPairedEndFastq *cdwQaPairedEndFastqFromNextRa(struct lineFile *lf, struct raToStructReader *reader) /* Return next stanza put into an cdwQaPairedEndFastq. */ { enum fields { fileId1Field, concordanceField, distanceMeanField, distanceStdField, distanceMinField, distanceMaxField, }; if (!raSkipLeadingEmptyLines(lf, NULL)) return NULL; struct cdwQaPairedEndFastq *el; AllocVar(el); bool *fieldsObserved = reader->fieldsObserved; bzero(fieldsObserved, reader->fieldCount); char *tag, *val; while (raNextTagVal(lf, &tag, &val, NULL)) { struct hashEl *hel = hashLookup(reader->fieldIds, tag); if (hel != NULL) { int id = ptToInt(hel->val); if (fieldsObserved[id]) errAbort("Duplicate tag %s line %d of %s\n", tag, lf->lineIx, lf->fileName); fieldsObserved[id] = TRUE; switch (id) { case fileId1Field: { el->fileId1 = sqlUnsigned(val); break; } case concordanceField: { el->concordance = sqlDouble(val); break; } case distanceMeanField: { el->distanceMean = sqlDouble(val); break; } case distanceStdField: { el->distanceStd = sqlDouble(val); break; } case distanceMinField: { el->distanceMin = sqlDouble(val); break; } case distanceMaxField: { el->distanceMax = sqlDouble(val); break; } default: internalErr(); break; } } } raToStructReaderCheckRequiredFields(reader, lf); return el; }