void readPairFile(struct lineFile *prf) /* Read in pairs and initialize clone list */ { int lineSize, i; char *line; char *words[4]; char *names[16]; int wordCount, nameCount; struct clone *clone; struct cloneName *cloneName; while (lineFileNext(prf, &line, &lineSize)) { wordCount = chopTabs(line,words); if (wordCount != 3) errAbort("Bad line %d of %s\n", prf->lineIx, prf->fileName); if (!hashLookup(clones, words[2])) { clone = createClone(words[2],NULL,NULL); hashAdd(clones, words[2], clone); slAddHead(&cloneList,clone); } AllocVar(cloneName); sprintf(cloneName->name, "%s", words[2]); nameCount = chopCommas(words[0],names); for (i = 0; i < nameCount; i++) hashAdd(leftNames, names[i], cloneName); nameCount = chopCommas(words[1],names); for (i = 0; i < nameCount; i++) hashAdd(rightNames, names[i], cloneName); } }
int findBedSize(char *fileName, struct lineFile **retLf) /* Read first line of file and figure out how many words in it. */ /* Input file could be stdin, in which case we really don't want to open, * read, and close it here. So if retLf is non-NULL, return the open * linefile (having told it to reuse the line we just read). */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[64], *line; int wordCount; if (!lineFileNextReal(lf, &line)) if (ignoreEmpty) return(0); line = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); if (wordCount == 0) errAbort("%s appears to be empty", fileName); if (retLf != NULL) { lineFileReuse(lf); *retLf = lf; } else lineFileClose(&lf); freeMem(line); return wordCount; }
void readHugoMultiTable(char *fileName, struct hugoMulti **retList, struct hash **retIdHash, struct hash **retSymbolHash) /* Read in file into list and hashes. Make hash keyed on omim ID * and on OMIM symbol. */ { struct hash *idHash = newHash(0); struct hash *symbolHash = newHash(0); struct hugoMulti *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[16]; char *line; int lineSize, wordCount; char *name; while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#') continue; wordCount = chopTabs(line, words); lineFileExpectWords(lf, 11, wordCount); el = hugoMultiLoad(words); slAddHead(&list, el); name = el->omimId; if (name[0] != 0) hashAdd(idHash, name, el); name = el->symbol; if (name[0] != 0) hashAdd(symbolHash, name, el); } lineFileClose(&lf); slReverse(&list); *retList = list; *retIdHash = idHash; *retSymbolHash = symbolHash; }
struct psl *nextLmPsl(struct lineFile *lf, struct lm *lm) /* Read next line from file and convert it to psl. Return * NULL at eof. */ { char *line; int lineSize; char *words[32]; int wordCount; if (!lineFileNext(lf, &line, &lineSize)) return NULL; wordCount = chopTabs(line, words); if (wordCount == 21) { return pslLoadLm(words, lm); } else if (wordCount == 23) { return pslxLoadLm(words, lm); } else { warn("Bad line %d of %s", lf->lineIx, lf->fileName); return NULL; } }
struct psl *nextPsl(struct lineFile *lf) /* Read next line from file and convert it to psl. Return * NULL at eof. */ { char *line; int lineSize; char *words[32]; int wordCount; if (!lineFileNext(lf, &line, &lineSize)) { //warn("File %s appears to be incomplete\n", lf->fileName); return NULL; } wordCount = chopTabs(line, words); if (wordCount == 21) { return pslLoad(words); } else if (wordCount == 23) { return pslxLoad(words); } else { warn("Bad line %d of %s", lf->lineIx, lf->fileName); return NULL; } }
boolean mgcStatusTblCopyRow(struct lineFile *inLf, FILE *outFh) /* read a copy one row of a status table tab file without * fully parsing. Expand if optional fields are missing */ { char *line; int numCols, i; char *row[MGCSTATUS_NUM_COLS]; if (!lineFileNextReal(inLf, &line)) return FALSE; numCols = chopTabs(line, row); numCols = min(numCols, MGCSTATUS_NUM_COLS); lineFileExpectAtLeast(inLf, MGCSTATUS_MIN_NUM_COLS, numCols); for (i = 0; i < numCols; i++) { if (i > 0) fputc('\t', outFh); fputs(row[i], outFh); } /* pad */ for (; i < MGCSTATUS_NUM_COLS; i++) fputc('\t', outFh); fputc('\n', outFh); return TRUE; }
void readPslFile(struct lineFile *pf) /* Process all records in a psl file of mRNA alignments */ { int lineSize; char *line; char *words[32]; int wordCount; struct psl *psl; struct clone *clone; struct pslAli *pa = NULL; struct cloneName *cloneName; while (lineFileNext(pf, &line, &lineSize)) { wordCount = chopTabs(line, words); if (wordCount != 21) errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName); psl = pslLoad(words); if (hashLookup(leftNames, psl->qName)) cloneName = hashMustFindVal(leftNames, psl->qName); else if (hashLookup(rightNames, psl->qName)) cloneName = hashMustFindVal(rightNames, psl->qName); else continue; clone = hashMustFindVal(clones, cloneName->name); if ((psl->tBaseInsert < TINSERT) && ((!NORANDOM) || (strlen(psl->tName) < 7))) { pa = createPslAli(psl); if (hashLookup(leftNames, psl->qName)) slAddHead(&(clone->end1), pa); else slAddHead(&(clone->end2), pa); } } }
void fixGdup(char *inName, char *outName) /* fixGdup - Reformat genomic dups table a little.. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); int wordCount, lineSize; char *words[32], *line; int i; while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#') continue; wordCount = chopTabs(line, words); if (wordCount == 0) continue; lineFileExpectWords(lf, 15, wordCount); for (i=0; i<3; ++i) fprintf(f, "%s\t", words[i]); fprintf(f, "%s:%s\t", words[6], words[7]); for (i=4; i<9; ++i) fprintf(f, "%s\t", words[i]); for (i=10; i<wordCount; ++i) { fprintf(f, "%s", words[i]); if (i == wordCount-1) fprintf(f, "\n"); else fprintf(f, "\t"); } } }
void gffFileAdd(struct gffFile *gff, char *fileName, int baseOffset) /* Create a gffFile structure from a GFF file. */ { /* Open file and do basic allocations. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *words[9]; int lineSize, wordCount; while (lineFileNext(lf, &line, &lineSize)) { if (line[0] != '#') { wordCount = chopTabs(line, words); if (wordCount > 0) gffFileAddRow(gff, baseOffset, words, wordCount, lf->fileName, lf->lineIx); } } slReverse(&gff->lineList); slReverse(&gff->seqList); slReverse(&gff->sourceList); slReverse(&gff->featureList); slReverse(&gff->groupList); slReverse(&gff->geneIdList); lineFileClose(&lf); }
void writeBedTab(char *fileName, struct bedStub *bedList, int bedSize) /* Write out bed list to tab-separated file. */ { struct bedStub *bed; FILE *f = mustOpen(fileName, "w"); char *words[64]; int i, wordCount; for (bed = bedList; bed != NULL; bed = bed->next) { if (!noBin) if (fprintf(f, "%u\t", hFindBin(bed->chromStart, bed->chromEnd)) <= 0) writeFailed(fileName); if (strictTab) wordCount = chopTabs(bed->line, words); else wordCount = chopLine(bed->line, words); for (i=0; i<wordCount; ++i) { /* new definition for old "reserved" field, now itemRgb */ /* and when itemRgb, it is a comma separated string r,g,b */ if (itemRgb && (i == 8)) { char *comma; /* Allow comma separated list of rgb values here */ comma = strchr(words[8], ','); if (comma) { int itemRgb = 0; if (-1 == (itemRgb = bedParseRgb(words[8]))) errAbort("ERROR: expecting r,g,b specification, " "found: '%s'", words[8]); else if (fprintf(f, "%d", itemRgb) <= 0) writeFailed(fileName); verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); if (i == wordCount-1) { if (fputc('\n', f) == EOF) writeFailed(fileName); } else if (fputc('\t', f) == EOF) writeFailed(fileName); } } fclose(f); }
void pslReps(char *inName, char *bestAliName, char *repName) /* Analyse inName and put best alignments for eacmRNA in estAliName. * Put repeat info in repName. */ { struct lineFile *in = pslFileOpen(inName); FILE *bestFile = mustOpen(bestAliName, "w"); FILE *repFile = mustOpen(repName, "w"); int lineSize; char *line; char *words[32]; int wordCount; struct psl *pslList = NULL, *psl = NULL; char lastName[512]; int aliCount = 0; quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout"); if (coverQSizeFile != NULL) loadCoverQSizes(coverQSizeFile); if (!quiet) printf("Processing %s to %s and %s\n", inName, bestAliName, repName); if (!noHead) pslWriteHead(bestFile); strcpy(lastName, ""); while (lineFileNext(in, &line, &lineSize)) { if (((++aliCount & 0x1ffff) == 0) && !quiet) { printf("."); fflush(stdout); } wordCount = chopTabs(line, words); if (wordCount == 21) psl = pslLoad(words); else if (wordCount == 23) psl = pslxLoad(words); else errAbort("Bad line %d of %s\n", in->lineIx, in->fileName); if (!sameString(lastName, psl->qName)) { doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); safef(lastName, sizeof(lastName), "%s", psl->qName); } slAddHead(&pslList, psl); } doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); lineFileClose(&in); fclose(bestFile); fclose(repFile); if (!quiet) printf("Processed %d alignments\n", aliCount); }
void processPrimers(struct lineFile *pf, FILE *of) /* Read and process isPCR file and sts locations */ { int lineSize, wordCount; char *line; char *words[21]; char *dbsts_name, *dbsts[4], *currDbsts; struct sts *sts=NULL; struct psl *psl; struct place *place; currDbsts = "\0"; while (lineFileNext(pf, &line, &lineSize)) { wordCount = chopTabs(line, words); if (wordCount != 21) errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName); psl = pslLoad(words); dbsts_name = cloneString(psl->qName); wordCount = chopByChar(dbsts_name, '_', dbsts, ArraySize(dbsts)); if (differentString(dbsts[1], currDbsts)) { if (sts != NULL) { filterPrimersAndWrite(of, sts); /* stsFree(&sts); */ freez(&currDbsts); } currDbsts = cloneString(dbsts[1]); sts = NULL; if (hashLookup(stsHash, dbsts[1])) sts = hashMustFindVal(stsHash, dbsts[1]); } if (sts) { AllocVar(place); /* Check if this psl record is already present */ if (!pslInList(place->psl, psl)) { slAddHead(&place->psl, psl); place->unali = calcUnali(sts, psl); place->sizeDiff = calcSizeDiff(sts, psl); place->badBits = calcBadBits(place); if (place->sizeDiff < (200 - (place->badBits * 50))) slAddHead(&sts->place, place); else placeFree(&place); } } } if (sts != NULL) filterPrimersAndWrite(of, sts); }
boolean gffHasGtfGroup(char *line) /* Return TRUE if line has a GTF group field */ { char *words[10]; char *dupe = cloneString(line); int wordCt = chopTabs(dupe, words); boolean isGtf = FALSE; if (wordCt >= 9) if (isGtfGroup(words[8])) isGtf = TRUE; freeMem(dupe); return isGtf; }
char* restField(struct bigBedInterval *bb, int fieldIdx) /* return a given field from the bb->rest field, NULL on error */ { if (fieldIdx==0) // we don't return the first(=name) field of bigBed return NULL; char *rest = cloneString(bb->rest); char *restFields[256]; int restCount = chopTabs(rest, restFields); char *field = NULL; if (fieldIdx < restCount) field = cloneString(restFields[fieldIdx]); freeMem(rest); return field; }
struct mgcStatusTbl *mgcStatusTblLoad(char *mgcStatusTab, unsigned opts) /* Load a mgcStatusTbl object from a tab file */ { struct mgcStatusTbl *mst = mgcStatusTblNew(opts); struct lineFile *lf = lineFileOpen(mgcStatusTab, TRUE); char *line; char *row[MGCSTATUS_NUM_COLS]; while (lineFileNextReal(lf, &line)) { int numCols = chopTabs(line, row); lineFileExpectAtLeast(lf, MGCSTATUS_MIN_NUM_COLS, numCols); loadRow(mst, lf, row, numCols); } lineFileClose(&lf); return mst; }
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList) /* Load one bed file. Make sure all lines have bedSize fields. * Put results in *pList. */ { char *words[64], *line, *dupe; int wordCount; struct bedStub *bed; verbose(1, "Reading %s\n", lf->fileName); while (lineFileNextReal(lf, &line)) { if (hasBin) nextWord(&line); dupe = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); /* ignore empty lines */ if (0 == wordCount) continue; lineFileExpectWords(lf, bedSize, wordCount); AllocVar(bed); bed->chrom = cloneString(words[0]); bed->chromStart = lineFileNeedNum(lf, words, 1); bed->chromEnd = lineFileNeedNum(lf, words, 2); if (! noStrict) { if (bed->chromEnd < 1) errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n", lf->lineIx, dupe); if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd) errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n" "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n", lf->lineIx, dupe, bed->chromStart); if (bed->chromStart > bed->chromEnd) errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n", lf->lineIx, dupe, bed->chromStart, bed->chromEnd); } bed->line = dupe; slAddHead(pList, bed); } }
void verifyJoinedFormat(char *s) /* Verify that s consists of lines with two tab-separated fields, * and that the second field has some n/a and some comma-separated lists. */ { char *e; int lineIx = 0; boolean gotCommas = FALSE, gotNa = FALSE; while (s != NULL && s[0] != 0) { char *row[3]; int fieldCount; ++lineIx; e = strchr(s, '\n'); if (e != NULL) *e++ = 0; if (s[0] != '#') { fieldCount = chopTabs(s, row); if (fieldCount != 2) { qaStatusSoftError(tablesTestList->status, "Got %d fields line %d of joined result, expected 2", fieldCount, lineIx); break; } if (sameString(row[1], "n/a")) gotNa = TRUE; if (countChars(s, ',') >= 2) gotCommas = TRUE; } s = e; } if (!gotCommas) qaStatusSoftError(tablesTestList->status, "Expected some rows in join to have comma separated lists."); if (!gotNa) qaStatusSoftError(tablesTestList->status, "Expected some rows in joint to have n/a."); }
void motifFinder(char *database, char *name, int fileCount, char *files[]) /* motifFinder - find largest scoring motif in bed items. */ { struct sqlConnection *conn = sqlConnect(database); int fileNum; char where[256]; struct chromInfo *ci = createChromInfoList(NULL, database); sqlSafefFrag(where, sizeof(where), "name = '%s'", name); struct dnaMotif *motif = dnaMotifLoadWhere(conn, motifTable, where); if(markovTable != NULL) dnaMotifMakeLog2(motif); if(motif == NULL) errAbort("couldn't find motif '%s'", name); for (fileNum = 0; fileNum < fileCount; fileNum++) { char *words[64], *line; char **row; struct lineFile *lf = lineFileOpen(files[fileNum], TRUE); while (lineFileNextReal(lf, &line)) { int dnaLength, i, j, rowOffset, length, wordCount = chopTabs(line, words); unsigned chromSize; boolean markovFound = FALSE; double mark0[5]; double mark2[5][5][5]; struct dnaSeq *seq = NULL; char *dupe = NULL; if (0 == wordCount) continue; lineFileExpectAtLeast(lf, 3, wordCount); dupe = cloneString(line); char *chrom = words[0]; int chromStart = lineFileNeedNum(lf, words, 1); if(markovTable != NULL) chromStart = max(2, chromStart); unsigned chromEnd = lineFileNeedNum(lf, words, 2); if (chromEnd < 1) errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n", lf->lineIx, dupe); if (chromStart > chromEnd) errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n", lf->lineIx, dupe, chromStart, chromEnd); length = chromEnd - chromStart; chromSize = getChromSize(ci, chrom); if(markovTable == NULL) { dnaLength = length; seq = hDnaFromSeq(database, chrom, chromStart, chromEnd, dnaUpper); if(uniformBackground) { int i; mark0[0] = 1; for(i = 1; i <= 4; i++) mark0[i] = 0.25; } else { dnaMark0(seq, mark0, NULL); } } else { dnaLength = length + 4; if(chromStart - 2 + dnaLength > chromSize) // can't do analysis for potential peak hanging off the end of the chrom continue; seq = hDnaFromSeq(database, chrom, chromStart - 2, chromEnd + 2, dnaUpper); struct sqlResult *sr = hRangeQuery(conn, markovTable, chrom, chromStart, chromStart + 1, NULL, &rowOffset); if((row = sqlNextRow(sr)) != NULL) { dnaMark2Deserialize(row[rowOffset + 3], mark2); dnaMarkMakeLog2(mark2); markovFound = TRUE; } else errAbort("markov table '%s' is missing; non-markov analysis is current not supported", markovTable); sqlFreeResult(&sr); } struct bed6FloatScore *hits = NULL; for (i = 0; i < 2; i++) { double mark0Copy[5]; char strand = i == 0 ? '+' : '-'; for (j = 0; j <= 4; j++) mark0Copy[j] = mark0[j]; if(strand == '-') { // reverse markov table too! double tmp; reverseComplement(seq->dna, dnaLength); tmp = mark0Copy[1]; mark0Copy[1] = mark0Copy[3]; mark0Copy[3] = tmp; tmp = mark0Copy[2]; mark0Copy[2] = mark0Copy[4]; mark0Copy[4] = tmp; } for (j = 0; j < length - motif->columnCount + 1; j++) // tricky b/c if(markovFound) then seq includes the two bytes on either side of actual sequence. { double score; if(markovFound) score = dnaMotifBitScoreWithMarkovBg(motif, seq->dna + j, mark2); else score = dnaMotifBitScoreWithMark0Bg(motif, seq->dna + j, mark0Copy); if(score >= minScoreCutoff) { int start; if(strand == '-') start = (chromEnd - j) - motif->columnCount; else start = chromStart + j; struct bed6FloatScore *hit = NULL; // Watch out for overlapping hits (on either strand; yes, I've seen that happen); // we report only the highest scoring hit in this case. // O(n^2) where n == number of motifs in a peak, but I expect n to be almost always very small. if(!originalCoordinates) { for (hit = hits; hit != NULL; hit = hit->next) { if(hit->chromEnd > start && hit->chromStart <= (start + motif->columnCount)) { verbose(3, "found overlapping hits: %d-%d overlaps with %d-%d\n", start, start + motif->columnCount, hit->chromStart, hit->chromEnd); break; } } } if(hit == NULL || hit->score < score) { if(hit == NULL) { AllocVar(hit); slAddHead(&hits, hit); hit->chrom = cloneString(chrom); } hit->chromStart = originalCoordinates ? chromStart : start; hit->chromEnd = originalCoordinates ? chromEnd : start + motif->columnCount; hit->score = score; hit->strand[0] = strand; } } verbose(3, "j: %d; score: %.2f\n", j, score); } } slSort(&hits, bed6FloatCmpDesc); int count; float currentPrior = prior; for(count = 1; hits != NULL; count++, hits = hits->next) { if(topOnly && count > topOnly) break; // Use a progressively weaker prior for hits with lower scores verbose(3, "count: %d; score: %.2f; prior: %.2f; log2(prior / (1 - prior)): %.2f\n", count, hits->score, currentPrior, log2(currentPrior / (1 - currentPrior))); if(hits->score >= minScoreCutoff - log2(currentPrior / (1 - currentPrior))) { printf("%s\t%d\t%d\t%s\t%.2f\t%c\n", chrom, originalCoordinates ? chromStart : hits->chromStart, originalCoordinates ? chromEnd : hits->chromStart + motif->columnCount, name, hits->score, hits->strand[0]); currentPrior = count == 1 ? priorBackoff : currentPrior * priorBackoff; if(count > 2) verbose(3, "hit for count: %d at %s:%d-%d\n", count, chrom, hits->chromStart, hits->chromStart + motif->columnCount); } else break; } freeDnaSeq(&seq); freeMem(dupe); } lineFileClose(&lf); } sqlDisconnect(&conn); }
static void bigBedClick(char *fileName, struct trackDb *tdb, char *item, int start, int end, int bedSize) /* Handle click in generic bigBed track. */ { boolean showUrl = FALSE; char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); int ivStart = start, ivEnd = end; if (start == end) { // item is an insertion; expand the search range from 0 bases to 2 so we catch it: ivStart = max(0, start-1); ivEnd++; } struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, ivStart, ivEnd, 0, lm); /* Get bedSize if it's not already defined. */ if (bedSize == 0) { bedSize = bbi->definedFieldCount; showUrl = TRUE; } char *scoreFilter = cartOrTdbString(cart, tdb, "scoreFilter", NULL); int minScore = 0; if (scoreFilter) minScore = atoi(scoreFilter); /* Find particular item in list - matching start, and item if possible. */ boolean found = FALSE; boolean firstTime = TRUE; struct bigBedInterval *bb; for (bb = bbList; bb != NULL; bb = bb->next) { if (!(bb->start == start && bb->end == end)) continue; if (bedSize > 3) { char *name = cloneFirstWordByTab(bb->rest); boolean match = sameString(name, item); freez(&name); if (!match) continue; } found = TRUE; if (firstTime) printf("<BR>\n"); int seq1Seq2Fields = 0; // check for seq1 and seq2 in columns 7+8 (eg, pairedTagAlign) boolean seq1Seq2 = sameOk(trackDbSetting(tdb, BASE_COLOR_USE_SEQUENCE), "seq1Seq2"); if (seq1Seq2 && bedSize == 6) seq1Seq2Fields = 2; char *fields[bedSize+seq1Seq2Fields]; char startBuf[16], endBuf[16]; char *rest = cloneString(bb->rest); int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bedSize+seq1Seq2Fields); if (bbFieldCount != bedSize+seq1Seq2Fields) { errAbort("Disagreement between trackDb field count (%d) and %s fieldCount (%d)", bedSize, fileName, bbFieldCount); } struct bed *bed = bedLoadN(fields, bedSize); if (bedSize >= 6 && scoreFilter && bed->score < minScore) continue; if (showUrl && (bedSize >= 4)) printCustomUrl(tdb, item, TRUE); bedPrintPos(bed, bedSize, tdb); // display seq1 and seq2 if (seq1Seq2 && bedSize+seq1Seq2Fields == 8) printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>" "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]); else if (isNotEmpty(rest)) { char *restFields[256]; int restCount = chopTabs(rest, restFields); int restBedFields = bedSize - 3; if (restCount > restBedFields) { if (0 == extraFieldsPrint(tdb,NULL,restFields + restBedFields,restCount - restBedFields)) { int i; char label[20]; safef(label, sizeof(label), "nonBedFieldsLabel"); printf("<B>%s </B>", trackDbSettingOrDefault(tdb, label, "Non-BED fields:")); for (i = restBedFields; i < restCount; i++) printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]); printf("<BR>\n"); } } } if (isCustomTrack(tdb->track)) { time_t timep = bbiUpdateTime(bbi); printBbiUpdateTime(&timep); } } if (!found) { printf("No item %s starting at %d\n", emptyForNull(item), start); } lmCleanup(&lm); bbiFileClose(&bbi); }
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, char *pepFile, char *mim2locFile) /* hgRefSeqMrna - Load refSeq mRNA alignments and other info into * refSeqGene table. */ { struct lineFile *lf; struct hash *raHash, *rsiHash = newHash(0); struct hash *loc2mimHash = newHash(0); struct refSeqInfo *rsiList = NULL, *rsi; char *s, *line, *row[5]; int wordCount, dotMod = 0; int noLocCount = 0; int rsiCount = 0; int noProtCount = 0; struct psl *psl; struct sqlConnection *conn = hgStartUpdate(database); struct hash *productHash = loadNameTable(conn, "productName", 16); struct hash *geneHash = loadNameTable(conn, "geneName", 16); char *kgName = "refGene"; FILE *kgTab = hgCreateTabFile(".", kgName); FILE *productTab = hgCreateTabFile(".", "productName"); FILE *geneTab = hgCreateTabFile(".", "geneName"); FILE *refLinkTab = hgCreateTabFile(".", "refLink"); FILE *refPepTab = hgCreateTabFile(".", "refPep"); FILE *refMrnaTab = hgCreateTabFile(".", "refMrna"); struct exon *exonList = NULL, *exon; char *answer; char cond_str[200]; /* Make refLink and other tables table if they don't exist already. */ sqlMaybeMakeTable(conn, "refLink", refLinkTableDef); sqlUpdate(conn, "NOSQLINJ delete from refLink"); sqlMaybeMakeTable(conn, "refGene", refGeneTableDef); sqlUpdate(conn, "NOSQLINJ delete from refGene"); sqlMaybeMakeTable(conn, "refPep", refPepTableDef); sqlUpdate(conn, "NOSQLINJ delete from refPep"); sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef); sqlUpdate(conn, "NOSQLINJ delete from refMrna"); /* Scan through locus link to omim ID file and put in hash. */ { char *row[2]; printf("Scanning %s\n", mim2locFile); lf = lineFileOpen(mim2locFile, TRUE); while (lineFileRow(lf, row)) { hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0]))); } lineFileClose(&lf); } /* Scan through .ra file and make up start of refSeqInfo * objects in hash and list. */ printf("Scanning %s\n", raFile); lf = lineFileOpen(raFile, TRUE); while ((raHash = hashNextRa(lf)) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } AllocVar(rsi); slAddHead(&rsiList, rsi); if ((s = hashFindVal(raHash, "acc")) == NULL) errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName); rsi->mrnaAcc = cloneString(s); if ((s = hashFindVal(raHash, "siz")) == NULL) errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName); rsi->size = atoi(s); if ((s = hashFindVal(raHash, "gen")) != NULL) rsi->geneName = cloneString(s); //!!!else //!!! warn("No gene name for %s", rsi->mrnaAcc); if ((s = hashFindVal(raHash, "cds")) != NULL) parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd); else rsi->cdsEnd = rsi->size; if ((s = hashFindVal(raHash, "ngi")) != NULL) rsi->ngi = atoi(s); rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName); s = hashFindVal(raHash, "pro"); if (s != NULL) rsi->productName = cloneString(s); rsi->productNameId = putInNameTable(productHash, productTab, s); hashAdd(rsiHash, rsi->mrnaAcc, rsi); freeHashAndVals(&raHash); } lineFileClose(&lf); if (clDots) printf("\n"); /* Scan through loc2ref filling in some gaps in rsi. */ printf("Scanning %s\n", loc2refFile); lf = lineFileOpen(loc2refFile, TRUE); while (lineFileNext(lf, &line, NULL)) { char *mrnaAcc; if (line[0] == '#') continue; wordCount = chopTabs(line, row); if (wordCount < 5) errAbort("Expecting at least 5 tab-separated words line %d of %s", lf->lineIx, lf->fileName); mrnaAcc = row[1]; mrnaAcc = accWithoutSuffix(mrnaAcc); if (mrnaAcc[2] != '_') warn("%s is and odd name %d of %s", mrnaAcc, lf->lineIx, lf->fileName); if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL) { rsi->locusLinkId = lineFileNeedNum(lf, row, 0); rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0])); rsi->proteinAcc = cloneString(accWithoutSuffix(row[4])); } } lineFileClose(&lf); /* Report how many seem to be missing from loc2ref file. * Write out knownInfo file. */ printf("Writing %s\n", "refLink.tab"); for (rsi = rsiList; rsi != NULL; rsi = rsi->next) { ++rsiCount; if (rsi->locusLinkId == 0) ++noLocCount; if (rsi->proteinAcc == NULL) ++noProtCount; fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n", emptyForNull(rsi->geneName), emptyForNull(rsi->productName), emptyForNull(rsi->mrnaAcc), emptyForNull(rsi->proteinAcc), rsi->geneNameId, rsi->productNameId, rsi->locusLinkId, rsi->omimId); } if (noLocCount) printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount); if (noProtCount) printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount); /* Process alignments and write them out as genes. */ lf = pslFileOpen(pslFile); dotMod = 0; while ((psl = pslNext(lf)) != NULL) { if (hashFindVal(rsiHash, psl->qName) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName); answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str); if (answer == NULL) { fprintf(stderr, "%s NOT FOUND.\n", psl->qName); fflush(stderr); } if (answer != NULL) { struct genePred *gp = NULL; exonList = pslToExonList(psl); fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t", psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd); rsi = hashMustFindVal(rsiHash, psl->qName); gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize); if (!gp) errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName); fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd); fprintf(kgTab, "%d\t", slCount(exonList)); fflush(kgTab); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->start); fprintf(kgTab, "\t"); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->end); fprintf(kgTab, "\n"); slFreeList(&exonList); } } else { fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName); fflush(stderr); } } if (clDots) printf("\n"); if (!clTest) { writeSeqTable(pepFile, refPepTab, FALSE, TRUE); writeSeqTable(faFile, refMrnaTab, FALSE, FALSE); } carefulClose(&kgTab); carefulClose(&productTab); carefulClose(&geneTab); carefulClose(&refLinkTab); carefulClose(&refPepTab); carefulClose(&refMrnaTab); if (!clTest) { printf("Loading database with %s\n", kgName); fflush(stdout); hgLoadTabFile(conn, ".", kgName, NULL); printf("Loading database with %s\n", "productName"); fflush(stdout); hgLoadTabFile(conn, ".", "productName", NULL); printf("Loading database with %s\n", "geneName"); fflush(stdout); hgLoadTabFile(conn, ".", "geneName", NULL); printf("Loading database with %s\n", "refLink"); fflush(stdout); hgLoadTabFile(conn, ".", "refLink", NULL); printf("Loading database with %s\n", "refPep"); fflush(stdout); hgLoadTabFile(conn, ".", "refPep", NULL); printf("Loading database with %s\n", "refMrna"); fflush(stdout); hgLoadTabFile(conn, ".", "refMrna", NULL); } }
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, boolean doCompress, FILE *f, int resTryCount, int resScales[], int resSizes[], struct bbExIndexMaker *eim, int bedCount, bits16 fieldCount, bits32 *retMaxBlockSize) /* Read through lf, writing it in f. Save starting points of blocks (every itemsPerSlot) * to boundsArray */ { int maxBlockSize = 0; struct bbiChromUsage *usage = usageList; char *line, *row[fieldCount+1]; int lastField = fieldCount-1; int itemIx = 0, sectionIx = 0; bits64 blockStartOffset = 0; int startPos = 0, endPos = 0; bits32 chromId = 0; struct dyString *stream = dyStringNew(0); /* Will keep track of some things that help us determine how much to reduce. */ bits32 resEnds[resTryCount]; int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; boolean atEnd = FALSE, sameChrom = FALSE; bits32 start = 0, end = 0; char *chrom = NULL; struct bed *bed; AllocVar(bed); /* Help keep track of which beds are in current chunk so as to write out * namedChunks to eim if need be. */ long sectionStartIx = 0, sectionEndIx = 0; for (;;) { /* Get next line of input if any. */ if (lineFileNextReal(lf, &line)) { /* Chop up line and make sure the word count is right. */ int wordCount; if (tabSep) wordCount = chopTabs(line, row); else wordCount = chopLine(line, row); lineFileExpectWords(lf, fieldCount, wordCount); loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE); chrom = bed->chrom; start = bed->chromStart; end = bed->chromEnd; sameChrom = sameString(chrom, usage->name); } else /* No next line */ { atEnd = TRUE; } /* Check conditions that would end block and save block info and advance to next if need be. */ if (atEnd || !sameChrom || itemIx >= itemsPerSlot) { /* Save stream to file, compressing if need be. */ if (stream->stringSize > maxBlockSize) maxBlockSize = stream->stringSize; if (doCompress) { size_t maxCompSize = zCompBufSize(stream->stringSize); // keep around an area of scratch memory static int compBufSize = 0; static char *compBuf = NULL; // check to see if buffer needed for compression is big enough if (compBufSize < maxCompSize) { // free up the old not-big-enough piece freez(&compBuf); // freez knows bout NULL // get new scratch area compBufSize = maxCompSize; compBuf = needLargeMem(compBufSize); } int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, stream->string, stream->stringSize); dyStringClear(stream); /* Save block offset and size for all named chunks in this section. */ if (eim != NULL) { bits64 blockEndOffset = ftell(f); bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset, sectionStartIx, sectionEndIx); sectionStartIx = sectionEndIx; } /* Save info on existing block. */ struct bbiBoundsArray *b = &bounds[sectionIx]; b->offset = blockStartOffset; b->range.chromIx = chromId; b->range.start = startPos; b->range.end = endPos; ++sectionIx; itemIx = 0; if (atEnd) break; } /* Advance to next chromosome if need be and get chromosome id. */ if (!sameChrom) { usage = usage->next; assert(usage != NULL); assert(sameString(chrom, usage->name)); for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; } chromId = usage->id; /* At start of block we save a lot of info. */ if (itemIx == 0) { blockStartOffset = ftell(f); startPos = start; endPos = end; } /* Otherwise just update end. */ { if (endPos < end) endPos = end; /* No need to update startPos since list is sorted. */ } /* Save name into namedOffset if need be. */ if (eim != NULL) { bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx); sectionEndIx += 1; } /* Write out data. */ dyStringWriteOne(stream, chromId); dyStringWriteOne(stream, start); dyStringWriteOne(stream, end); if (fieldCount > 3) { int i; /* Write 3rd through next to last field and a tab separator. */ for (i=3; i<lastField; ++i) { char *s = row[i]; dyStringAppend(stream, s); dyStringAppendC(stream, '\t'); } /* Write last field and terminal zero */ char *s = row[lastField]; dyStringAppend(stream, s); } dyStringAppendC(stream, 0); itemIx += 1; /* Do zoom counting. */ for (resTry = 0; resTry < resTryCount; ++resTry) { bits32 resEnd = resEnds[resTry]; if (start >= resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = start + resScales[resTry]; } while (end > resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = resEnd + resScales[resTry]; } } } assert(sectionIx == sectionCount); freez(&bed); *retMaxBlockSize = maxBlockSize; }
void liftTabbed(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], int ctgWord, int startWord, int endWord, boolean doubleLift, int ctgWord2, int startWord2, int endWord2, int startOffset, int strandWord) /* Generic lift a tab-separated file with contig, start, and end fields. * If doubleLift is TRUE, also lift second set of coordinated.*/ { int minFieldCount = max3(startWord, endWord, ctgWord) + 1; int wordCount, lineSize; char *words[128], *line, *source; struct lineFile *lf; FILE *f = mustOpen(destFile, "w"); int i,j; int start = 0; int end = 0; int start2 = 0; int end2 = 0; char *contig, *chrom = NULL, *chrom2 = NULL; struct liftSpec *spec; static char buf[1024*16]; char *s; int len; struct bedInfo *biList = NULL, *bi; boolean anyHits = FALSE; if (doubleLift) { int min2 = max3(ctgWord2, startWord2, endWord2); minFieldCount = max(minFieldCount, min2); } for (i=0; i<sourceCount; ++i) { source = sources[i]; lf = lineFileOpen(source, TRUE); verbose(1, "Lifting %s\n", source); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#') continue; wordCount = chopTabs(line, words); if (wordCount == 0) continue; if (wordCount < minFieldCount) errAbort("Expecting at least %d words line %d of %s", minFieldCount, lf->lineIx, lf->fileName); contig = words[ctgWord]; contig = rmChromPrefix(contig); if (startWord >= 0) start = lineFileNeedNum(lf, words, startWord); if (endWord >= 0) end = lineFileNeedNum(lf, words, endWord); spec = findLift(liftHash, contig, lf); if (spec == NULL) { if (how == carryMissing) chrom = cloneString(contig); else continue; } else { chrom = spec->newName; if (spec->strand == '-') { int s = start - startOffset, e = end; start = spec->oldSize - e + startOffset; end = spec->oldSize - s; if (strandWord >= 0 && strandWord < wordCount) { char strand = words[strandWord][0]; if (strand == '+') words[strandWord] = "-"; else if (strand == '-') words[strandWord] = "+"; } } start += spec->offset; end += spec->offset; } if (doubleLift) { contig = words[ctgWord2]; start2 = lineFileNeedNum(lf, words, startWord2); end2 = lineFileNeedNum(lf, words, endWord2); spec = findLift(liftHash, contig, lf); if (spec == NULL) { if (how == carryMissing) chrom2 = cloneString(contig); else errAbort("Couldn't find second contig in lift file at line %d of %s\n", lf->lineIx, lf->fileName); } else { cantHandleSpecRevStrand(spec); chrom2 = spec->newName; start2 += spec->offset; end2 += spec->offset; } } anyHits = TRUE; s = buf; for (j=0; j<wordCount; ++j) { if (s + 128 >= buf + sizeof(buf)) errAbort("Line %d too long in %s", lf->lineIx, lf->fileName); if (j != 0) *s++ = '\t'; if (j == ctgWord) s += sprintf(s, "%s", chrom); else if (j == startWord) s += sprintf(s, "%d", start); else if (j == endWord) s += sprintf(s, "%d", end); else if (doubleLift && j == ctgWord2) s += sprintf(s, "%s", chrom2); else if (doubleLift && j == startWord2) s += sprintf(s, "%d", start2); else if (doubleLift && j == endWord2) s += sprintf(s, "%d", end2); else s += sprintf(s, "%s", words[j]); } *s = 0; if (nosort) { fprintf(f, "%s\n", buf); } else { len = s-buf; bi = needMem(sizeof(*bi) + len); bi->chrom = chrom; bi->start = start; bi->end = end; memcpy(bi->line, buf, len); slAddHead(&biList, bi); } } lineFileClose(&lf); if (dots) verbose(1, "\n"); } if (!nosort) { slSort(&biList, bedInfoCmp); for (bi = biList; bi != NULL; bi = bi->next) { fprintf(f, "%s\n", bi->line); } } if (ferror(f)) errAbort("error writing %s", destFile); fclose(f); if (!anyHits) errAbort("No lines lifted!"); }
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList) /* Load one bed file. Make sure all lines have the correct number of fields. * Put results in *pList. */ { char *words[64], *line, *dupe; int wordCount; struct bedStub *bed; struct asObject *asObj = getAsObj(bedSize); int fieldCount = getFieldCount(bedSize, asObj); struct bed *validateBed; AllocVar(validateBed); verbose(1, "Reading %s\n", lf->fileName); while (lineFileNextReal(lf, &line)) { if (hasBin) nextWord(&line); dupe = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); /* ignore empty lines */ if (0 == wordCount) continue; lineFileExpectWords(lf, fieldCount, wordCount); if (type) // TODO also, may need to add a flag to the validateBed() interface to support -allowNegativeScores when not isCt // although can probably get away without it since usually -allowNegativeScores is used by ct which has already verified it. // thus -allowNegativeScores is unlikely to be used with -type. { loadAndValidateBed(words, typeBedN, fieldCount, lf, validateBed, asObj, FALSE); checkChromNameAndSize(lf, validateBed->chrom, validateBed->chromEnd); } AllocVar(bed); bed->chrom = cloneString(words[0]); bed->chromStart = lineFileNeedNum(lf, words, 1); bed->chromEnd = lineFileNeedNum(lf, words, 2); if (! noStrict) { if ((bed->chromEnd < 1) && !allowStartEqualEnd) errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n", lf->lineIx, dupe); if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd) errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n" "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n", lf->lineIx, dupe, bed->chromStart); if (bed->chromStart > bed->chromEnd) errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n", lf->lineIx, dupe, bed->chromStart, bed->chromEnd); } bed->line = dupe; slAddHead(pList, bed); } if (asObj) asObjectFreeList(&asObj); freez(&validateBed); }
void writeBedTab(char *fileName, struct bedStub *bedList) /* Write out bed list to tab-separated file. */ { struct bedStub *bed; FILE *f = mustOpen(fileName, "w"); char *words[64]; int i, wordCount; for (bed = bedList; bed != NULL; bed = bed->next) { if (!noBin) { // allow for zero-length at start of seq [bin code can't handle 0-0] unsigned end = (bed->chromEnd > 0) ? bed->chromEnd : 1; if (fprintf(f, "%u\t", hFindBin(bed->chromStart, end)) <= 0) writeFailed(fileName); } if (strictTab) wordCount = chopTabs(bed->line, words); else wordCount = chopLine(bed->line, words); for (i=0; i<wordCount; ++i) { /* new definition for old "reserved" field, now itemRgb */ /* and when itemRgb, it is a comma separated string r,g,b */ if (itemRgb && (i == 8)) { char *comma; /* Allow comma separated list of rgb values here */ comma = strchr(words[8], ','); if (comma) { int itemRgb = 0; if (-1 == (itemRgb = bedParseRgb(words[8]))) errAbort("ERROR: expecting r,g,b specification, " "found: '%s'", words[8]); else if (fprintf(f, "%d", itemRgb) <= 0) writeFailed(fileName); verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); } else if ((dotIsNull > 0) && (dotIsNull == i) && sameString(words[i],".")) /* If the . was used to represent NULL, replace with -1 in the tables */ { if (fputs("-1", f) == EOF) writeFailed(fileName); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); if (i == wordCount-1) { if (fputc('\n', f) == EOF) writeFailed(fileName); } else if (fputc('\t', f) == EOF) writeFailed(fileName); } } fclose(f); }
struct tagStorm *idfToStormTop(char *fileName) /* Convert an idf.txt format file to a tagStorm with a single top-level stanza */ { /* Create a tag storm with one as yet empty stanza */ struct tagStorm *storm = tagStormNew(fileName); struct tagStanza *stanza = tagStanzaNew(storm, NULL); /* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */ char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data"; struct dyString *additionalFileDy = dyStringNew(0); /* There can be multiple secondary accession tags, so handle these too */ char *secondaryAccessionTag = "idf.Comment_SecondaryAccession"; struct dyString *secondaryAccessionDy = dyStringNew(0); /* Parse lines from idf file into stanza */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct dyString *dyVal = dyStringNew(0); while (lineFileNextReal(lf, &line)) { /* Erase trailing tab... */ eraseTrailingSpaces(line); /* Parse line into tab-separated array and make sure it's a reasonable size */ char *row[256]; int rowSize = chopTabs(line, row); if (rowSize == ArraySize(row)) errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName); if (rowSize < 2) continue; /* Convert first element to tagName */ char tagName[256]; aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName)); /* Special case where we already are a comma separated list */ if (sameString(tagName, "idf.Publication_Author_List")) { tagStanzaAppend(storm, stanza, tagName, row[1]); } else if (startsWith(additionalFilePrefix, tagName)) { csvEscapeAndAppend(additionalFileDy, row[1]); } else if (sameString(secondaryAccessionTag, tagName)) { csvEscapeAndAppend(secondaryAccessionDy, row[1]); } else { /* Convert rest of elements to possibly comma separated values */ dyStringClear(dyVal); int i; for (i=1; i<rowSize; ++i) csvEscapeAndAppend(dyVal, row[i]); tagStanzaAppend(storm, stanza, tagName, dyVal->string); } } if (additionalFileDy->stringSize != 0) tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string); if (secondaryAccessionDy->stringSize != 0) tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string); dyStringFree(&secondaryAccessionDy); dyStringFree(&additionalFileDy); dyStringFree(&dyVal); lineFileClose(&lf); return storm; }