struct mapPos *readInfoFile(char *mapName) /* Read maps from file. */ { struct lineFile *lf = lineFileOpen(mapName, TRUE); int lineSize, wordCount; char *line, *words[16]; struct mapPos *list = NULL, *el; lineFileNeedNext(lf, &line, &lineSize); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#') continue; wordCount = chopLine(line, words); lineFileExpectWords(lf, 3, wordCount); AllocVar(el); el->cloneName = cloneString(words[0]); el->pos = atoi(words[1]); el->phase = atoi(words[2]); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
void makeMotifs(char *inFile, struct hash *tfHash, char *outFile) /* Parse input motifs and save them to outFile in dnaMotif format. */ { struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct hashEl *hel; for (;;) { char *line; char *words[256], *word; int wordCount; struct dnaMotif *motif; if (!lineFileSkipTo(lf, "Probability matrix for")) break; lineFileNeedNext(lf, &line, NULL); wordCount = chopLine(line, words); if (wordCount >= ArraySize(words)) errAbort("Line %d of %s is too long\n", lf->lineIx, lf->fileName); if (!sameString(words[0], "#")) badFormat(lf); AllocVar(motif); motif->columnCount = wordCount-1; readBaseProbs(lf, words, "#A", &motif->aProb, motif->columnCount); readBaseProbs(lf, words, "#C", &motif->cProb, motif->columnCount); readBaseProbs(lf, words, "#T", &motif->tProb, motif->columnCount); readBaseProbs(lf, words, "#G", &motif->gProb, motif->columnCount); if (!lineFileSkipTo(lf, "Source:")) lineFileUnexpectedEnd(lf); lineFileReuse(lf); lineFileNeedNext(lf, &line, NULL); word = nextWord(&line); word = nextWord(&line); if (word == NULL) errAbort("Short Source: line %d of %s", lf->lineIx, lf->fileName); motif->name = cloneString(word); hel = hashLookup(tfHash, motif->name); if (hel == NULL) errAbort("%s in %s but not GFFs", motif->name, lf->fileName); hel->val = motif; dnaMotifTabOut(motif, f); } carefulClose(&f); lineFileClose(&lf); }
void checkInputOpenFiles(struct inInfo *array, int count) /* Make sure all of the input is there and of right format before going forward. Since * this is going to take a while we want to fail fast. */ { int i; for (i=0; i<count; ++i) { struct inInfo *in = &array[i]; switch (in->type) { case itBigWig: { /* Just open and close, it will abort if any problem. */ in->bbi = bigWigFileOpen(in->fileName); break; } case itPromoterBed: case itUnstrandedBed: case itBlockedBed: { struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE); char *line; lineFileNeedNext(lf, &line, NULL); char *dupe = cloneString(line); char *row[256]; int wordCount = chopLine(dupe, row); struct bed *bed = NULL; switch (in->type) { case itPromoterBed: lineFileExpectAtLeast(lf, 6, wordCount); bed = bedLoadN(row, 6); char strand = bed->strand[0]; if (strand != '+' && strand != '-') errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]); break; case itUnstrandedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 4); break; case itBlockedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 12); break; default: internalErr(); break; } bedFree(&bed); freez(&dupe); lineFileReuse(lf); break; } default: internalErr(); break; } } }
int countWcDiff(char *fileName) /** Count how many lines counted are reported by 'wc'. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; int numLineDiff=0; char *row[3]; lineFileNeedNext(lf, &line, &lineSize); line = trimSpaces(line); chopString(line, " ", row, 3); numLineDiff=atoi(row[0]); lineFileClose(&lf); return numLineDiff; }
void rmskOut2OpenVerify(char *fileName, struct lineFile **retFile, boolean *retEmpty) /* Open repeat masker .out file and verify that it is good. * Set retEmpty if it has header characteristic of an empty file. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; lineFileNeedNext(lf, &line, &lineSize); if (startsWith("There were no", line)) *retEmpty = TRUE; line = skipLeadingSpaces(line); if (! ( startsWith("SW", line) || startsWith("bit", line) ) ) errAbort("%s doesn't seem to be a RepeatMasker .out file", fileName); lineFileSkip(lf, 2); *retEmpty = FALSE; *retFile = lf; }
void readBaseProbs(struct lineFile *lf, char **words, char *firstWord, float **pArray, int colCount) /* Allocate and read base probabilities. */ { char *line; int wordCount; float *array; int i; lineFileNeedNext(lf, &line, NULL); wordCount = chopByWhite(line, words, colCount+1); lineFileExpectWords(lf, colCount+1, wordCount); if (!sameString(words[0], firstWord)) errAbort("Expecting %s, got %s line %d of %s", firstWord, words[0], lf->lineIx, lf->fileName); AllocArray(array, colCount); for (i=0; i<colCount; ++i) array[i] = atof(words[i+1]); *pArray = array; }
struct segFile *segMayOpen(char *fileName) /* Open up a segment file for reading. Read header and verify. Prepare * for subsequent calls to segNext(). Return NULL if file does not exist. */ { struct segFile *sf; struct lineFile *lf; char *line, *name, *val, *word; char *sig = "##seg"; /* Open fileName. */ if ((lf = lineFileMayOpen(fileName, TRUE)) == NULL) return NULL; AllocVar(sf); sf->lf = lf; /* Check for a valid signature. */ lineFileNeedNext(lf, &line, NULL); if (!startsWith(sig, line)) errAbort("%s does not start with %s", fileName, sig); line += strlen(sig); /* parse name=val. */ while ((word = nextWord(&line)) != NULL) { name = word; val = strchr(word, '='); if (val == NULL) errAbort("Missing = after %s line 1 of %s", name, fileName); *val++ = 0; if (sameString(name, "version")) sf->version = atoi(val); } if (sf->version == 0) errAbort("No version line 1 of %s", fileName); return sf; }
struct mafFile *mafMayOpen(char *fileName) /* Open up a maf file and verify header. */ { struct mafFile *mf; struct lineFile *lf; char *line, *word; char *sig = "##maf"; if ((lf = lineFileMayOpen(fileName, TRUE)) == NULL) return NULL; AllocVar(mf); mf->lf = lf; lineFileNeedNext(lf, &line, NULL); if (!startsWith(sig, line)) { errAbort("%s does not start with %s", fileName, sig); } line += strlen(sig); while ((word = nextWord(&line)) != NULL) { /* Parse name=val. */ char *name = word; char *val = strchr(word, '='); if (val == NULL) errAbort("Missing = after %s line 1 of %s\n", name, fileName); *val++ = 0; if (sameString(name, "version")) mf->version = atoi(val); else if (sameString(name, "scoring")) mf->scoring = cloneString(val); } if (mf->version == 0) errAbort("No version line 1 of %s\n", fileName); return mf; }
void hgLoadRnaFold(char *database, char *table, char *foldDir) /* hgLoadRnaFold - Load a directory full of RNA fold files into database. */ { char path[PATH_LEN]; struct slName *dirList, *dirEl; struct lineFile *lf; char *line, *word, *s, c; FILE *f = hgCreateTabFile(tabDir, table); int count = 0; dirList = listDir(foldDir, "*"); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { char *name = dirEl->name; if (sameString(name, "CVS")) continue; safef(path, sizeof(path), "%s/%s", foldDir, name); lf = lineFileOpen(path, TRUE); if (!lineFileNext(lf, &line, NULL)) { if (warnEmpty) { warn("%s is empty, skipping\n", name); lineFileClose(&lf); continue; } else errAbort("%s is empty\n", name); } if (!isupper(line[0])) notFold(path, 1); fprintf(f, "%s\t", name); /* Save name */ fprintf(f, "%s\t", line); /* Save sequence */ lineFileNeedNext(lf, &line, NULL); c = line[0]; if (c != '.' && c != '(') notFold(path, 2); word = nextWord(&line); fprintf(f, "%s\t", word); /* Save nested parenthesis */ /* Parse out (energy) term at end of line. */ s = strchr(line, '('); if (s == NULL) notFold(path, 3); word = skipLeadingSpaces(s+1); if (word == NULL || (!word[0] == '-' && !isdigit(word[0]))) notFold(path, 4); if ((s = strchr(word, ')')) == NULL) notFold(path, 5); *s = 0; fprintf(f, "%s\n", word); lineFileClose(&lf); ++count; } printf("Parsed %d files\n", count); if (doLoad) { struct sqlConnection *conn = sqlConnect(database); rnaFoldCreateTable(conn, table); hgLoadTabFile(conn, tabDir, table, &f); hgRemoveTabFile(tabDir, table); sqlDisconnect(&conn); } }