struct mapPos *readInfoFile(char *mapName) /* Read maps from file. */ { struct lineFile *lf = lineFileOpen(mapName, TRUE); int lineSize, wordCount; char *line, *words[16]; struct mapPos *list = NULL, *el; lineFileNeedNext(lf, &line, &lineSize); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '#') continue; wordCount = chopLine(line, words); lineFileExpectWords(lf, 3, wordCount); AllocVar(el); el->cloneName = cloneString(words[0]); el->pos = atoi(words[1]); el->phase = atoi(words[2]); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
static void parseColumnHeaderRow(struct vcfFile *vcff, char *line) /* Make sure column names are as we expect, and store genotype sample IDs if any are given. */ { if (line[0] != '#') { vcfFileErr(vcff, "Expected to find # followed by column names (\"#CHROM POS ...\"), " "not \"%s\"", line); lineFileReuse(vcff->lf); return; } char *words[VCF_MAX_COLUMNS]; int wordCount = chopLine(line+1, words); if (wordCount >= VCF_MAX_COLUMNS) vcfFileErr(vcff, "header contains at least %d columns; " "VCF_MAX_COLUMNS may need to be increased in vcf.c!", VCF_MAX_COLUMNS); expectColumnName(vcff, "CHROM", words, 0); expectColumnName(vcff, "POS", words, 1); expectColumnName(vcff, "ID", words, 2); expectColumnName(vcff, "REF", words, 3); expectColumnName(vcff, "ALT", words, 4); expectColumnName2(vcff, "QUAL", "PROB", words, 5); expectColumnName(vcff, "FILTER", words, 6); expectColumnName(vcff, "INFO", words, 7); if (wordCount > 8) { expectColumnName(vcff, "FORMAT", words, 8); if (wordCount < 10) vcfFileErr(vcff, "FORMAT column is given, but no sample IDs for genotype columns...?"); vcff->genotypeCount = (wordCount - 9); vcff->genotypeIds = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(char *)); int i; for (i = 9; i < wordCount; i++) vcff->genotypeIds[i-9] = vcfFileCloneStr(vcff, words[i]); } }
struct blastFile *blastFileOpenVerify(char *fileName) /* Open file, read and verify header. */ { struct blastFile *bf; char *line; char *words[16]; int wordCount; struct lineFile *lf; AllocVar(bf); bf->lf = lf = lineFileOpen(fileName, TRUE); bf->fileName = cloneString(fileName); /* Parse first line - something like: */ line = bfNeedNextLine(bf); wordCount = chopLine(line, words); if (wordCount < 3) bfBadHeader(bf); bf->program = cloneString(words[0]); bf->version = cloneString(words[1]); bf->buildDate = cloneString(words[2]); if (!wildMatch("*BLAST*", bf->program)) bfBadHeader(bf); if (!isdigit(bf->version[0])) bfBadHeader(bf); if (bf->buildDate[0] != '[') bfBadHeader(bf); return bf; }
static struct gfRange *gfQuerySeq(int conn, struct dnaSeq *seq) /* Ask server for places sequence hits. */ { struct gfRange *rangeList = NULL, *range; char buf[256], *row[6]; int rowSize; startSeqQuery(conn, seq, "query"); /* Read results line by line and save in list, and return. */ for (;;) { netRecieveString(conn, buf); if (sameString(buf, "end")) { break; } else if (startsWith("Error:", buf)) { gfServerWarn(seq, buf); break; } else { rowSize = chopLine(buf, row); if (rowSize < 6) errAbort("Expecting 6 words from server got %d", rowSize); range = gfRangeLoad(row); slAddHead(&rangeList, range); } } slReverse(&rangeList); return rangeList; }
boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize) /* Look for gene type (unc-12 or something) synonym for cosmid.N name. */ { FILE *f; char fileName[512]; char lineBuf[512]; int nameLen = strlen(orfName); boolean ok = FALSE; sprintf(fileName, "%sorf2gene", wormFeaturesDir()); f = mustOpen(fileName, "r"); while (fgets(lineBuf, sizeof(lineBuf), f)) { if (strncmp(lineBuf, orfName, nameLen) == 0 && lineBuf[nameLen] == ' ') { char *words[2]; int wordCount; wordCount = chopLine(lineBuf, words); assert((int)strlen(words[1]) < bufSize); strncpy(geneNameBuf, words[1], bufSize); ok = TRUE; break; } } fclose(f); return ok; }
int findBedSize(char *fileName, struct lineFile **retLf) /* Read first line of file and figure out how many words in it. */ /* Input file could be stdin, in which case we really don't want to open, * read, and close it here. So if retLf is non-NULL, return the open * linefile (having told it to reuse the line we just read). */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[64], *line; int wordCount; if (!lineFileNextReal(lf, &line)) if (ignoreEmpty) return(0); line = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); if (wordCount == 0) errAbort("%s appears to be empty", fileName); if (retLf != NULL) { lineFileReuse(lf); *retLf = lf; } else lineFileClose(&lf); freeMem(line); return wordCount; }
struct lump *readLumps(char *fileName) /* Read in lumps from file. */ { struct lump *lumpList = NULL, *lump = NULL; char line[1024]; int lineCount; char *words[3]; int wordCount; boolean isIndented; FILE *f = mustOpen(fileName, "r"); while (fgets(line, sizeof(line), f)) { ++lineCount; isIndented = isspace(line[0]); wordCount = chopLine(line, words); if (wordCount == 0) continue; /* Allow blank lines. */ if (isIndented) { if (wordCount != 2 || !isdigit(words[0][0])) errAbort("Bad line %d of %s\n", lineCount, fileName); lump->count += atoi(words[0]); } else { AllocVar(lump); lump->seq = cloneString(words[0]); slAddHead(&lumpList, lump); } } fclose(f); slReverse(&lumpList); return lumpList; }
struct pgo *readC2g(char *fileName) /* Read a C2g file into memory. */ { FILE *f = mustOpen(fileName, "r"); struct pgo *list = NULL, *el; char lineBuf[128]; char *words[4]; int wordCount; int lineCount = 0; while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; wordCount = chopLine(lineBuf, words); if (wordCount == 0) continue; /* Ignore blank lines. */ if (wordCount != 3) { errAbort("Strange line starting with %s line %d of %s", words[0], lineCount, fileName); } AllocVar(el); if (!wormParseChromRange(words[0], &el->chrom, &el->start, &el->end)) errAbort("Bad chromosome range line %d of %s", lineCount, fileName); el->strand = words[1][0]; el->gene = cloneString(words[2]); slAddHead(&list, el); } slReverse(&list); return list; }
char *findEnsTrans(struct lineFile *lf, char *line) /* Find transcript name out of ensemble line. Squawk and die * if a problem. */ { char *words[32]; int wordCount, i; char *pat = "Translation:"; int patSize = strlen(pat); wordCount = chopLine(line+1, words); for (i=0; i<wordCount; ++i) { if (startsWith(pat, words[i])) return words[i] + patSize; } // Ensembl appears to have changed their format recently; handle both formats. wordCount = chopString(line+1, "|", words, ArraySize(words)); if (wordCount >= 3) { char *ptr = strchr(words[2], '.'); if (ptr != NULL) *ptr = 0; return(words[2]); } errAbort("Couldn't find '%s' key for transcript name line %d of %s", pat, lf->lineIx, lf->fileName); return NULL; }
int main(int argc, char *argv[]) { char *inName, *name; int chunkSize = 4048*1024; FILE *in; int accSize = 0; int newAccSize; int oneSize; char line[512]; int lineCount; char *words[16]; int wordCount; struct slName *bacs = NULL, *bn; char *dirName; char *outDir; if (argc != 4) usage(); inName = argv[1]; dirName = argv[2]; outDir = argv[3]; in = mustOpen(inName, "r"); while (fgets(line, sizeof(line), in)) { char *sizeString; ++lineCount; wordCount = chopLine(line, words); if (wordCount == 0) continue; if (wordCount != 9) errAbort("Line %d of %s doesn't look like an ls -l line", lineCount, inName); sizeString = words[4]; if (!isdigit(sizeString[0])) errAbort("Line %d of %s doesn't look like an ls - l line", lineCount, inName); name = words[8]; oneSize = atoi(sizeString); newAccSize = accSize + oneSize; if (newAccSize > chunkSize) { finishJob(&bacs, accSize); accSize = oneSize; if (oneSize > chunkSize) warn("Size %d of %s exceed chunk size %d", oneSize, name, chunkSize); } else { accSize = newAccSize; } bn = newSlName(name); slAddHead(&bacs, bn); } if (bacs != NULL) finishJob(&bacs, accSize); printf("%d total jobs\n", jobCount); writeInLists(outDir, dirName); //writeJobs("job", "in", startMachine, stopMachine, "cc"); }
struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb) /* get conservation wiggle table names and labels from trackDb setting, ignoring those where table doesn't exist */ { char *fields[20]; int fieldCt; int i; char *wigTable, *wigLabel; struct consWiggle *wig, *wigList = NULL; char *setting = trackDbSetting(tdb, CONS_WIGGLE); if (!setting) return NULL; fieldCt = chopLine(cloneString(setting), fields); for (i = 0; i < fieldCt; i += 3) { wigTable = fields[i]; if (hTableExists(db, wigTable)); { AllocVar(wig); wig->table = cloneString(wigTable); wigLabel = (i+1 == fieldCt ? DEFAULT_CONS_LABEL : fields[i+1]); wig->leftLabel = cloneString(wigLabel); wigLabel = (i+2 >= fieldCt ? wig->leftLabel : fields[i+2]); wig->uiLabel = cloneString(wigLabel); slAddTail(&wigList, wig); } } return wigList; }
void getSizes(char *fileName, int *retU, int *retN) /* Add together sizes in a gold file */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); int lineSize, wordCount; char *line, *words[16]; int start,end,size; int u = 0, n = 0; while (lineFileNext(lf, &line, &lineSize)) { wordCount = chopLine(line, words); if (wordCount < 8) errAbort("Short line %d of %s\n", lf->lineIx, lf->fileName); start = atoi(words[1]) - 1; end = atoi(words[2]); size = end-start; if (words[4][0] == 'N' || words[4][0] == 'U') n += size; else u += size; } lineFileClose(&lf); *retU = u; *retN = n; }
static void getXrefInfo(struct sqlConnection *conn, char **retXrefTable, char **retIdField, char **retAliasField) /* See if curTrack specifies an xref/alias table for lookup of IDs. */ { char *xrefSpec = curTrack ? trackDbSetting(curTrack, "idXref") : NULL; char *xrefTable = NULL, *idField = NULL, *aliasField = NULL; if (xrefSpec != NULL) { char *words[3]; chopLine(cloneString(xrefSpec), words); if (isEmpty(words[2])) errAbort("trackDb error: track %s, setting idXref must be followed " "by three words (xrefTable, idField, aliasField).", curTrack->track); xrefTable = words[0]; idField = words[1]; aliasField = words[2]; if (!sqlTableExists(conn, xrefTable) || sqlFieldIndex(conn, xrefTable, idField) < 0 || sqlFieldIndex(conn, xrefTable, aliasField) < 0) xrefTable = idField = aliasField = NULL; } if (retXrefTable != NULL) *retXrefTable = xrefTable; if (retIdField != NULL) *retIdField = idField; if (retAliasField != NULL) *retAliasField = aliasField; }
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end) /* rt1dCreate - find items in 1-D range tree. */ { struct lineFile *lf = lineFileOpen(tabFile, TRUE); struct crTreeFile *crf = crTreeFileOpen(treeFile); struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end); verbose(2, "Got %d overlapping blocks\n", slCount(blockList)); for (block = blockList; block != NULL; block = block->next) { verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size); lineFileSeek(lf, block->offset, SEEK_SET); bits64 sizeUsed = 0; while (sizeUsed < block->size) { char *line; int size; if (!lineFileNext(lf, &line, &size)) errAbort("Couldn't read %s\n", lf->fileName); char *parsedLine = cloneString(line); char *row[3]; if (chopLine(parsedLine, row) != ArraySize(row)) errAbort("Badly formatted line of %s\n%s", lf->fileName, line); char *bedChrom = row[0]; bits32 bedStart = sqlUnsigned(row[1]); bits32 bedEnd = sqlUnsigned(row[2]); if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0) fprintf(stdout, "%s\n", line); freeMem(parsedLine); sizeUsed += size; } } crTreeFileClose(&crf); }
int main(int argc, char *argv[]) { FILE *in = stdin; FILE *out = stdout; char origLine[1024]; char line[1024]; char *words[256]; int wordCount; struct hash *hash; int wordIx; char *word; if (argc != 2 || !isdigit(argv[1][0])) errAbort("Usage: %s wordIx", argv[0]); wordIx = atoi(argv[1]); hash = newHash(14); while (fgets(line, sizeof(line), in)) { strcpy(origLine, line); wordCount = chopLine(line, words); if (wordCount < 1 || words[0][0] == '#') continue; if (wordCount >= wordIx) { word = words[wordIx-1]; if (!hashLookup(hash, word)) { fprintf(out, "%s", origLine); hashAdd(hash, word, NULL); } } } }
bool ProjectAnimData::readMotionOnly(QFile *file){ if (!file || !file->isOpen()){ return false; } QByteArray line; auto ok = false; auto lineCount = 0UL; if (!chopLine(file, line, lineCount)){ return false; } auto blocksize = line.toULong(&ok); if (!ok){ return false; } animationMotionDataLines = blocksize; while (!file->atEnd()){ //Read individual projects... lineCount = 0; for (; lineCount < blocksize;){ //Get animation motion data... animationMotionData.append(new SkyrimAnimationMotionData(this)); if (!animationMotionData.last()->read(file, lineCount)){ return false; } } return true; } return false; }
struct hash *makePairHash(char *pairFile) /* Make up a hash table out of paired ESTs. */ { FILE *f = mustOpen(pairFile, "r"); char line[256]; char *words[3]; int wordCount; int lineCount = 0; struct hash *hash; struct hashEl *h5, *h3; struct estPair *ep; char *name5, *name3; hash = newHash(19); while (fgets(line, sizeof(line), f)) { ++lineCount; wordCount = chopLine(line, words); if (wordCount == 0) continue; if (wordCount != 2) errAbort("%d words in pair line %d of %s", wordCount, lineCount, pairFile); name5 = words[0]; name3 = words[1]; AllocVar(ep); h5 = hashAdd(hash, name5, ep); h3 = hashAdd(hash, name3, ep); ep->name5 = h5->name; ep->name3 = h3->name; slAddHead(&estPairList, ep); } printf("Read %d lines of pair info\n", lineCount); return hash; }
struct psl *nextPsl(struct lineFile *lf) /* Read next line from file and convert it to psl. Return * NULL at eof. */ { char *line; int lineSize; char *words[32]; int wordCount; struct psl *psl; if (!lineFileNext(lf, &line, &lineSize)) { return NULL; } wordCount = chopLine(line, words); if (wordCount == 21) { return pslLoad(words); } else { errAbort("Bad line %d of %s, %d words expecting %d", lf->lineIx, lf->fileName, wordCount, 21); return NULL; } }
static boolean parseBlockLine(struct blastFile *bf, int *startRet, int *endRet, struct dyString *seq) /* read and parse the next target or query line, like: * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488 * * also handle broken NCBI tblastn output like: * Sbjct: 1181YYGEQRSTNGQTIQLKTQVFRRFPDDDDESEDHDDPDNAHESPEQEGAEGHFDLHYYENQ 1360 * * Ignores and returns FALSE on bogus records generated by PSI BLAST, such as * Query: 0 -------------------------- * Sbjct: 38 PPGPPGVAGGNQTTVVVIYGPPGPPG 63 * Query: 0 * Sbjct: 63 63 * If FALSE is returned, the output parameters will be unchanged. */ { char* line = bfNeedNextLine(bf); int a, b, s, e; char *words[16]; int wordCount = chopLine(line, words); if ((wordCount < 2) || (wordCount > 4) || !(sameString("Query:", words[0]) || sameString("Sbjct:", words[0]))) bfSyntax(bf); /* look for one of the bad formats to ignore, as described above */ if (((wordCount == 2) && isAllDigits(words[1])) || ((wordCount == 3) && isAllDigits(words[1]) && isAllDigits(words[2])) || ((wordCount == 3) && isAllDigits(words[1]) && isAllDashes(words[2]))) { bfWarn(bf, "Ignored invalid alignment format for aligned sequence pair"); return FALSE; } /* special handling for broken output with no space between start and * sequence */ if (wordCount == 3) { char *p; if (!isdigit(words[1][0]) || !isdigit(words[2][0])) bfSyntax(bf); a = atoi(words[1]); b = atoi(words[2]); p = words[1]; while ((*p != '\0') && (isdigit(*p))) p++; dyStringAppend(seq, p); } else { if (!isdigit(words[1][0]) || !isdigit(words[3][0])) bfSyntax(bf); a = atoi(words[1]); b = atoi(words[3]); dyStringAppend(seq, words[2]); } s = min(a,b); e = max(a,b); *startRet = min(s, *startRet); *endRet = max(e, *endRet); return TRUE; }
void addCtgFile(char *liftFileName, struct ctgPos **pCtgList) /* Create ctgPos's out of liftSpecs in liftFile. */ { struct lineFile *lf = lineFileOpen(liftFileName, TRUE); int lineSize, wordCount; char *line, *words[16]; struct liftSpec lift; struct ctgPos *ctg; printf("Processing %s\n", liftFileName); while (lineFileNext(lf, &line, &lineSize)) { wordCount = chopLine(line, words); if (wordCount == 0) continue; if (wordCount != 5) errAbort("Expecting 5 words line %d of %s", lf->lineIx, lf->fileName); liftSpecStaticLoad(words, &lift); AllocVar(ctg); ctg->contig = cloneString(skipPastSlash(lift.oldName)); ctg->size = lift.oldSize; ctg->chrom = cloneString(lift.newName); ctg->chromStart = lift.offset; ctg->chromEnd = lift.offset + lift.oldSize; slAddHead(pCtgList, ctg); } lineFileClose(&lf); }
void checkInputOpenFiles(struct inInfo *array, int count) /* Make sure all of the input is there and of right format before going forward. Since * this is going to take a while we want to fail fast. */ { int i; for (i=0; i<count; ++i) { struct inInfo *in = &array[i]; switch (in->type) { case itBigWig: { /* Just open and close, it will abort if any problem. */ in->bbi = bigWigFileOpen(in->fileName); break; } case itPromoterBed: case itUnstrandedBed: case itBlockedBed: { struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE); char *line; lineFileNeedNext(lf, &line, NULL); char *dupe = cloneString(line); char *row[256]; int wordCount = chopLine(dupe, row); struct bed *bed = NULL; switch (in->type) { case itPromoterBed: lineFileExpectAtLeast(lf, 6, wordCount); bed = bedLoadN(row, 6); char strand = bed->strand[0]; if (strand != '+' && strand != '-') errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]); break; case itUnstrandedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 4); break; case itBlockedBed: lineFileExpectAtLeast(lf, 4, wordCount); bed = bedLoadN(row, 12); break; default: internalErr(); break; } bedFree(&bed); freez(&dupe); lineFileReuse(lf); break; } default: internalErr(); break; } } }
struct cmChrom *wuParse(char *inName, struct hash *cloneHash, struct cloneInfo **pCloneList, struct hash *ctgHash) /* Parse wash U style clone map into common * intermediate format. */ { struct lineFile *in = lineFileOpen(inName, TRUE); int lineSize; char *line; char *words[16]; int wordCount; char chromName[32]; char lastChromName[32]; boolean isOrdered; char *s; struct cmChrom *chromList = NULL, *chrom = NULL; struct hash *ntHash = newHash(0); strcpy(lastChromName, ""); while (lineFileNext(in, &line, &lineSize)) { struct cmContig **pContigList; if (line[0] == '#') continue; if (!startsWith("start human SUPERLINK", line)) continue; wordCount = chopLine(line, words); if (wordCount != 5 && wordCount != 4) errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName); if (words[wordCount-1][0] != '*') errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName); s = strrchr(words[2], '.'); if (s == NULL) errAbort("Couldn't find chromosome line %d of 5s\n", in->lineIx, in->fileName); s += 1; strncpy(chromName, s, sizeof(chromName)); if (!sameString(chromName, lastChromName)) { strcpy(lastChromName, chromName); printf("Reading %s\n", chromName); AllocVar(chrom); chrom->name = cloneString(chromName); slAddHead(&chromList, chrom); } isOrdered = sameWord(words[3], "ORDERED"); pContigList = (isOrdered ? &chrom->orderedList : &chrom->randomList); if (*pContigList != NULL) errAbort("Duplicate chromosome %s %s", chromName, words[3]); if (isFinChrom(chromName)) continue; if (sameString(chromName, "NA")) readNa(in, chrom, pContigList, cloneHash, pCloneList, ctgHash); else readContigList(in, chrom, pContigList, sameString(chromName, "COMMIT"), !isOrdered, cloneHash, pCloneList, ctgHash, ntHash); } slReverse(&chromList); return chromList; }
void alignNt(char *nt) /* Do alignments of draft bacs against one NT. */ { char indexFileName[512]; char ntFaName[512]; struct lineFile *indexLf; int lineSize; char *line; char *words[3]; int wordCount; struct patSpace *ps; struct dnaSeq *ntSeq; printf("<H1>Check Layout of %s</H1>\n", nt); printf("<PRE>"); sprintf(ntFaName, "%s/p%s.fa", faDir, nt); ntSeq = faReadAllDna(ntFaName); ps = makePatSpace(&ntSeq, 1, oocFile, 10, 500); sprintf(indexFileName, "%s/%s.index", indexDir, nt); uglyf("Checking out %s and %s\n", indexFileName, ntFaName); indexLf = lineFileOpen(indexFileName, TRUE); while (lineFileNext(indexLf, &line, &lineSize)) { wordCount = chopLine(line, words); if (wordCount > 0) { char bacFaName[512]; struct dnaSeq *contigList, *contig; char *bacAcc = words[0]; char *s = strrchr(bacAcc, '.'); if (s != NULL) *s = 0; uglyf("%s\n", bacAcc); sprintf(bacFaName, "%s/%s.fa", faDir, bacAcc); contigList = faReadAllDna(bacFaName); for (contig = contigList; contig != NULL; contig = contig->next) { boolean isRc; uglyf(" %s\n", contig->name); for (isRc = FALSE; isRc <= TRUE; isRc += 1) { struct ssBundle *bunList, *bun; bunList = ssFindBundles(ps, contig, contig->name, ffTight); for (bun = bunList; bun != NULL; bun = bun->next) { showBundle(bun, isRc); } ssBundleFreeList(&bunList); reverseComplement(contig->dna, contig->size); } } freeDnaSeqList(&contigList); } } lineFileClose(&indexLf); freeDnaSeqList(&ntSeq); }
void writeBedTab(char *fileName, struct bedStub *bedList, int bedSize) /* Write out bed list to tab-separated file. */ { struct bedStub *bed; FILE *f = mustOpen(fileName, "w"); char *words[64]; int i, wordCount; for (bed = bedList; bed != NULL; bed = bed->next) { if (!noBin) if (fprintf(f, "%u\t", hFindBin(bed->chromStart, bed->chromEnd)) <= 0) writeFailed(fileName); if (strictTab) wordCount = chopTabs(bed->line, words); else wordCount = chopLine(bed->line, words); for (i=0; i<wordCount; ++i) { /* new definition for old "reserved" field, now itemRgb */ /* and when itemRgb, it is a comma separated string r,g,b */ if (itemRgb && (i == 8)) { char *comma; /* Allow comma separated list of rgb values here */ comma = strchr(words[8], ','); if (comma) { int itemRgb = 0; if (-1 == (itemRgb = bedParseRgb(words[8]))) errAbort("ERROR: expecting r,g,b specification, " "found: '%s'", words[8]); else if (fprintf(f, "%d", itemRgb) <= 0) writeFailed(fileName); verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); } else if (fputs(words[i], f) == EOF) writeFailed(fileName); if (i == wordCount-1) { if (fputc('\n', f) == EOF) writeFailed(fileName); } else if (fputc('\t', f) == EOF) writeFailed(fileName); } } fclose(f); }
void liftGl(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift up coordinates in .gl file. */ { char dirBuf[256], chromName[256]; int i; char *source; char *contig; FILE *dest = mustOpen(destFile, "w"); struct lineFile *lf = NULL; int lineSize, wordCount; char *line, *words[32]; struct liftSpec *spec; int offset; if (how == carryMissing) warn("'carry' doesn't work for .gl files, ignoring"); for (i=0; i<sourceCount; ++i) { source = sources[i]; verbose(1, "Processing %s\n", source); contig = contigInDir(source, dirBuf); verbose(2,"#\tcontig: %s, source: %s, dirBuf: %s\n", contig, source, dirBuf); if (!startsWith("ctg", contig) && !startsWith("NC_", contig) && !startsWith("NT_", contig) && !startsWith("NG_", contig)) { sprintf(chromName, "chr%s", contig); contig = chromName; verbose(2,"#\tcontig: %s, chromName: %s\n", contig, chromName); } spec = findLift(liftHash, contig, lf); if (spec == NULL) continue; cantHandleSpecRevStrand(spec); offset = spec->offset; lf = lineFileMayOpen(source, TRUE); if (lf == NULL) { warn("%s doesn't exist, skipping", source); continue; } while (lineFileNext(lf, &line, &lineSize)) { int s, e; if ((wordCount = chopLine(line, words)) != 4) errAbort("Bad line %d of %s", lf->lineIx, lf->fileName); s = atoi(words[1]); e = atoi(words[2]); fprintf(dest, "%s\t%d\t%d\t%s\n", words[0], s+offset, e+offset, words[3]); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void edwFixRevoked(char *database, char *inFile) /* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */ /* inFile is in format: * metaVariable objStatus revoked [- reason] * metaObject name */ { struct sqlConnection *conn = edwConnect(); struct lineFile *lf = lineFileOpen(inFile, TRUE); char *line; char *defaultReason = "Revoked in ENCODE2"; char *reason = defaultReason; while (lineFileNextReal(lf, &line)) { if (startsWithWord("metaVariable", line)) { char *pattern = "metaVariable objStatus revoked"; if (startsWithWord(pattern, line)) { reason = skipLeadingSpaces(line + strlen(pattern)); if (isEmpty(reason)) reason = defaultReason; else { if (reason[0] == '-') reason = skipLeadingSpaces(reason + 1); reason = cloneString(reason); } } else errAbort("??? %s\n", line); } else if (startsWithWord("metaObject", line)) { char *row[3]; int wordCount = chopLine(line, row); if (wordCount != 2) errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName); char *prefix = row[1]; if (!startsWith("wgEncode", prefix)) errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName); char query[512]; sqlSafef(query, sizeof(query), "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix); struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query); printf("# %s %s\n", prefix, reason); for (ef = efList; ef != NULL; ef = ef->next) { long long id = ef->id; printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id); } } else errAbort("Unrecognized first word in %s\n", line); } }
void addStageInfo(char *gsDir, struct hash *cloneHash) /* Add info about which file and what stage clone is in. */ /* TSF - This is no longer used due to unavailability of *.finf files - 4/7/2003 */ { static char *finfFiles[] = {"ffa/finished.finf", "ffa/draft.finf", "ffa/predraft.finf", "ffa/extras.finf" }; static char stages[] = "FDPD"; struct lineFile *lf; char *line; char *words[7]; int numStages = strlen(stages); int i; char pathName[512]; char *finfFile, stage; int warnsLeft = maxWarn; /* Only show first maxWarn warnings about missing clones. */ char cloneName[256]; struct clonePos *clone; int wordCount, cloneCount; for (i=0; i<numStages; ++i) { finfFile = finfFiles[i]; stage = stages[i]; sprintf(pathName, "%s/%s", gsDir, finfFile); printf("Processing %s\n", pathName); lf = lineFileOpen(pathName, TRUE); cloneCount = 0; while (lineFileNext(lf, &line, NULL)) { wordCount = chopLine(line, words); assert(wordCount == 7); strncpy(cloneName, words[1], sizeof(cloneName)); chopSuffix(cloneName); if ((clone = hashFindVal(cloneHash, cloneName)) == NULL) { if (warnsLeft > 0) { --warnsLeft; warn("%s is in %s but not in ooDir/*/*.gl", cloneName, pathName); } else if (warnsLeft == 0) { --warnsLeft; warn("(Truncating additional warnings)"); } continue; } clone->stage[0] = stage; cloneCount++; } lineFileClose(&lf); printf("Got %d clones in %s\n", cloneCount, pathName); } }
static void agpToFa(char *agpFile, char *agpSeq, char *faOut, char *seqDir) /* agpToFa - Convert a .agp file to a .fa file. */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; int lastPos = 0; struct agpFrag *agpList = NULL, *agp; FILE *f = mustOpen(faOut, "w"); char *prevChrom = NULL; verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (! (sameWord("all", agpSeq) || sameWord(words[0], agpSeq))) continue; if (prevChrom != NULL && !sameString(prevChrom, words[0])) { agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f); lastPos = 0; } if (words[4][0] != 'N' && words[4][0] != 'U') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* file is 1-based but agpFragLoad() now assumes 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n", lf->lineIx, lf->fileName); if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); slAddHead(&agpList, agp); lastPos = agp->chromEnd; } else { lastPos = lineFileNeedNum(lf, words, 2); } if (prevChrom == NULL || !sameString(prevChrom, words[0])) { freeMem(prevChrom); prevChrom = cloneString(words[0]); } } agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f); }
static void parseDatabaseLines(struct blastFile *bf, char *line, struct blastQuery *bq) /* Process something like: * Database: chr22.fa * 977 sequences; 95,550,797 total letters */ { static struct dyString *tmpBuf = NULL; char *words[16]; int wordCount; if (bq->database != NULL) bfError(bf, "already parse Database:"); if (tmpBuf == NULL) tmpBuf = dyStringNew(512); /* parse something like * Database: celegans98 * some versions of blastp include the absolute path, but * then split it across lines. */ wordCount = chopLine(line, words); if (wordCount < 2) bfError(bf, "Expecting database name"); dyStringClear(tmpBuf); dyStringAppend(tmpBuf, words[1]); while (line = bfNeedNextLine(bf), !isspace(line[0])) { dyStringAppend(tmpBuf, line); } bq->database = cloneString(tmpBuf->string); /* Process something like: * 977 sequences; 95,550,797 total letters */ wordCount = chopLine(line, words); if (wordCount < 3 || !isdigit(words[0][0]) || !isdigit(words[2][0])) bfError(bf, "Expecting database info"); decomma(words[0]); decomma(words[2]); bq->dbSeqCount = atoi(words[0]); bq->dbBaseCount = atoi(words[2]); }
void viewWaba(char *wabName) /* Show human readable waba alignment. */ { struct lineFile *lf = lineFileOpen(wabName, TRUE); int lineSize; char *line; char *qSym; char *tSym; char *hSym; int symCount; int wordCount, partCount; char *words[16], *parts[4]; int qStart, qEnd, tStart, tEnd; char strand; while (lineFileNext(lf, &line, &lineSize)) { printf("%s\n", line); wordCount = chopLine(line, words); if (wordCount != 10) errAbort("Funny info line %d of %s\n", lf->lineIx, lf->fileName); partCount = chopString(words[6], ":-", parts, ArraySize(parts)); if (partCount != 3) errAbort("Bad query range line %d of %s\n", lf->lineIx, lf->fileName); qStart = atoi(parts[1]); qEnd = atoi(parts[2]); strand = words[7][0]; partCount = chopString(words[8], ":-", parts, ArraySize(parts)); if (partCount != 3) errAbort("Bad target range line %d of %s\n", lf->lineIx, lf->fileName); tStart = atoi(parts[1]); tEnd = atoi(parts[2]); if (!lineFileNext(lf, &line, &lineSize)) errAbort("Unexpected EOF."); symCount = strlen(line); qSym = cloneString(line); if (!lineFileNext(lf, &line, &lineSize)) errAbort("Unexpected EOF."); tSym = cloneString(line); if (!lineFileNext(lf, &line, &lineSize)) errAbort("Unexpected EOF."); hSym = cloneString(line); if (strand == '+') xenShowAli(qSym, tSym, hSym, symCount, stdout, qStart, tStart, '+', '+', 60); else xenShowAli(qSym, tSym, hSym, symCount, stdout, qEnd, tStart, '-', '+', 60); freeMem(hSym); freeMem(tSym); freeMem(qSym); } lineFileClose(&lf); }