struct tagStorm *tagStormFromFile(char *fileName) /* Load up all tags from file. */ { int depth = 0, maxDepth = 32; int indentStack[maxDepth]; indentStack[0] = 0; /* Open up file first thing. Abort if there's a problem here. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); /* Set up new empty tag storm and get local pointer to memory pool. */ struct tagStorm *tagStorm = tagStormNew(fileName); struct lm *lm = tagStorm->lm; struct tagStanza *stanza, *parent = NULL, *lastStanza = NULL; int currentIndent = 0; int stanzaCount = 0; int tagCount = 0; while (raSkipLeadingEmptyLines(lf, NULL)) { ++stanzaCount; char *tag, *val; int stanzaIndent, tagIndent; lmAllocVar(lm, stanza); struct slPair *pairList = NULL, *pair; while (raNextTagValWithIndent(lf, &tag, &val, NULL, &tagIndent)) { lmAllocVar(lm, pair); pair->name = lmCloneString(lm, tag); pair->val = lmCloneString(lm, val); if (pairList == NULL) /* If this is first tag of a new stanza check indentation * and put stanza in appropriate level of hierarchy */ { if (tagIndent != currentIndent) { stanzaIndent = tagIndent; if (stanzaIndent > currentIndent) { if (++depth >= maxDepth) errAbort("Tags nested too deep line %d of %s. Max nesting is %d", lf->lineIx, lf->fileName, maxDepth); indentStack[depth] = stanzaIndent; if (lastStanza == NULL) errAbort("Initial stanza needs to be non-indented line %d of %s", lf->lineIx, lf->fileName); parent = lastStanza; } else /* going up */ { /* Find stanza in parent chain at same level of indentation. This * will be an older sibling */ struct tagStanza *olderSibling; for (olderSibling = parent; olderSibling != NULL; olderSibling = olderSibling->parent) { --depth; if (indentStack[depth] == stanzaIndent) break; } if (olderSibling == NULL) { warn("Indentation inconsistent line %d of %s.", lf->lineIx, lf->fileName); warn("If you are using tabs, check your tab stop is set to 8."); warn("Otherwise check that when you are reducing indentation in a stanza"); warn("that it is the same as the previous stanza at the same level."); noWarnAbort(); } parent = olderSibling->parent; } currentIndent = tagIndent; } if (parent == NULL) slAddHead(&tagStorm->forest, stanza); else slAddHead(&parent->children, stanza); stanza->parent = parent; pairList = pair; lastStanza = stanza; } else { if (tagIndent != currentIndent) errAbort("Tags in stanza inconsistently indented line %d of %s", lf->lineIx, lf->fileName); slAddHead(&pairList, pair); } ++tagCount; } slReverse(&pairList); stanza->tagList = pairList; } lineFileClose(&lf); rReverseStanzaList(&tagStorm->forest); return tagStorm; }
struct tagStorm *idfToStormTop(char *fileName) /* Convert an idf.txt format file to a tagStorm with a single top-level stanza */ { /* Create a tag storm with one as yet empty stanza */ struct tagStorm *storm = tagStormNew(fileName); struct tagStanza *stanza = tagStanzaNew(storm, NULL); /* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */ char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data"; struct dyString *additionalFileDy = dyStringNew(0); /* There can be multiple secondary accession tags, so handle these too */ char *secondaryAccessionTag = "idf.Comment_SecondaryAccession"; struct dyString *secondaryAccessionDy = dyStringNew(0); /* Parse lines from idf file into stanza */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct dyString *dyVal = dyStringNew(0); while (lineFileNextReal(lf, &line)) { /* Erase trailing tab... */ eraseTrailingSpaces(line); /* Parse line into tab-separated array and make sure it's a reasonable size */ char *row[256]; int rowSize = chopTabs(line, row); if (rowSize == ArraySize(row)) errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName); if (rowSize < 2) continue; /* Convert first element to tagName */ char tagName[256]; aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName)); /* Special case where we already are a comma separated list */ if (sameString(tagName, "idf.Publication_Author_List")) { tagStanzaAppend(storm, stanza, tagName, row[1]); } else if (startsWith(additionalFilePrefix, tagName)) { csvEscapeAndAppend(additionalFileDy, row[1]); } else if (sameString(secondaryAccessionTag, tagName)) { csvEscapeAndAppend(secondaryAccessionDy, row[1]); } else { /* Convert rest of elements to possibly comma separated values */ dyStringClear(dyVal); int i; for (i=1; i<rowSize; ++i) csvEscapeAndAppend(dyVal, row[i]); tagStanzaAppend(storm, stanza, tagName, dyVal->string); } } if (additionalFileDy->stringSize != 0) tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string); if (secondaryAccessionDy->stringSize != 0) tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string); dyStringFree(&secondaryAccessionDy); dyStringFree(&additionalFileDy); dyStringFree(&dyVal); lineFileClose(&lf); return storm; }