Esempio n. 1
struct mapPos *readInfoFile(char *mapName)
/* Read maps from file. */
struct lineFile *lf = lineFileOpen(mapName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct mapPos *list = NULL, *el;

lineFileNeedNext(lf, &line, &lineSize);
while (lineFileNext(lf, &line, &lineSize))
    if (line[0] == '#')
    wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 3, wordCount);
    el->cloneName = cloneString(words[0]);
    el->pos = atoi(words[1]);
    el->phase = atoi(words[2]);
    slAddHead(&list, el);
return list;
Esempio n. 2
static void parseColumnHeaderRow(struct vcfFile *vcff, char *line)
/* Make sure column names are as we expect, and store genotype sample IDs if any are given. */
if (line[0] != '#')
    vcfFileErr(vcff, "Expected to find # followed by column names (\"#CHROM POS ...\"), "
	       "not \"%s\"", line);
char *words[VCF_MAX_COLUMNS];
int wordCount = chopLine(line+1, words);
if (wordCount >= VCF_MAX_COLUMNS)
    vcfFileErr(vcff, "header contains at least %d columns; "
	       "VCF_MAX_COLUMNS may need to be increased in vcf.c!", VCF_MAX_COLUMNS);
expectColumnName(vcff, "CHROM", words, 0);
expectColumnName(vcff, "POS", words, 1);
expectColumnName(vcff, "ID", words, 2);
expectColumnName(vcff, "REF", words, 3);
expectColumnName(vcff, "ALT", words, 4);
expectColumnName2(vcff, "QUAL", "PROB", words, 5);
expectColumnName(vcff, "FILTER", words, 6);
expectColumnName(vcff, "INFO", words, 7);
if (wordCount > 8)
    expectColumnName(vcff, "FORMAT", words, 8);
    if (wordCount < 10)
	vcfFileErr(vcff, "FORMAT column is given, but no sample IDs for genotype columns...?");
    vcff->genotypeCount = (wordCount - 9);
    vcff->genotypeIds = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(char *));
    int i;
    for (i = 9;  i < wordCount;  i++)
	vcff->genotypeIds[i-9] = vcfFileCloneStr(vcff, words[i]);
Esempio n. 3
struct blastFile *blastFileOpenVerify(char *fileName)
/* Open file, read and verify header. */
struct blastFile *bf;
char *line;
char *words[16];
int wordCount;
struct lineFile *lf;

bf->lf = lf = lineFileOpen(fileName, TRUE);
bf->fileName = cloneString(fileName);

/* Parse first line - something like: */
line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if (wordCount < 3)
bf->program = cloneString(words[0]);
bf->version = cloneString(words[1]);
bf->buildDate = cloneString(words[2]);
if (!wildMatch("*BLAST*", bf->program))
if (!isdigit(bf->version[0]))
if (bf->buildDate[0] != '[')
return bf;
Esempio n. 4
static struct gfRange *gfQuerySeq(int conn, struct dnaSeq *seq)
/* Ask server for places sequence hits. */
struct gfRange *rangeList = NULL, *range;
char buf[256], *row[6];
int rowSize;

startSeqQuery(conn, seq, "query");

/* Read results line by line and save in list, and return. */
for (;;)
    netRecieveString(conn, buf);
    if (sameString(buf, "end"))
    else if (startsWith("Error:", buf))
	gfServerWarn(seq, buf);
	rowSize = chopLine(buf, row);
	if (rowSize < 6)
	    errAbort("Expecting 6 words from server got %d", rowSize);
	range = gfRangeLoad(row);
	slAddHead(&rangeList, range);
return rangeList;
Esempio n. 5
boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize)
/* Look for gene type (unc-12 or something) synonym for cosmid.N name. */
FILE *f;
char fileName[512];
char lineBuf[512];
int nameLen = strlen(orfName);
boolean ok = FALSE;

sprintf(fileName, "%sorf2gene", wormFeaturesDir());
f = mustOpen(fileName, "r");
while (fgets(lineBuf, sizeof(lineBuf), f))
    if (strncmp(lineBuf, orfName, nameLen) == 0 && lineBuf[nameLen] == ' ')
        char *words[2];
        int wordCount;
        wordCount = chopLine(lineBuf, words);
        assert((int)strlen(words[1]) < bufSize);
        strncpy(geneNameBuf, words[1], bufSize);
        ok = TRUE;
return ok;
int findBedSize(char *fileName, struct lineFile **retLf)
/* Read first line of file and figure out how many words in it. */
/* Input file could be stdin, in which case we really don't want to open,
 * read, and close it here.  So if retLf is non-NULL, return the open 
 * linefile (having told it to reuse the line we just read). */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[64], *line;
int wordCount;

if (!lineFileNextReal(lf, &line))
    if (ignoreEmpty)
line = cloneString(line);
if (strictTab)
    wordCount = chopTabs(line, words);
    wordCount = chopLine(line, words);
if (wordCount == 0)
    errAbort("%s appears to be empty", fileName);
if (retLf != NULL)
    *retLf = lf;
return wordCount;
Esempio n. 7
struct lump *readLumps(char *fileName)
/* Read in lumps from file. */
struct lump *lumpList = NULL, *lump = NULL;
char line[1024];
int lineCount;
char *words[3];
int wordCount;
boolean isIndented;
FILE *f = mustOpen(fileName, "r");

while (fgets(line, sizeof(line), f))
    isIndented = isspace(line[0]);
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;   /* Allow blank lines. */
    if (isIndented)
        if (wordCount != 2 || !isdigit(words[0][0]))
            errAbort("Bad line %d of %s\n", lineCount, fileName);
        lump->count += atoi(words[0]);
        lump->seq = cloneString(words[0]);
        slAddHead(&lumpList, lump);
return lumpList;
Esempio n. 8
struct pgo *readC2g(char *fileName)
/* Read a C2g file into memory. */
FILE *f = mustOpen(fileName, "r");
struct pgo *list = NULL, *el;
char lineBuf[128];
char *words[4];
int wordCount;
int lineCount = 0;

while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    wordCount = chopLine(lineBuf, words);
    if (wordCount == 0)
        continue;   /* Ignore blank lines. */
    if (wordCount != 3)
        errAbort("Strange line starting with %s line %d of %s",
            words[0], lineCount, fileName);
    if (!wormParseChromRange(words[0], &el->chrom, &el->start, &el->end))
        errAbort("Bad chromosome range line %d of %s", lineCount, fileName);
    el->strand = words[1][0];
    el->gene = cloneString(words[2]);
    slAddHead(&list, el);
return list;
Esempio n. 9
char *findEnsTrans(struct lineFile *lf, char *line)
/* Find transcript name out of ensemble line.  Squawk and die
 * if a problem. */
char *words[32];
int wordCount, i;
char *pat = "Translation:";
int patSize = strlen(pat);

wordCount = chopLine(line+1, words);
for (i=0; i<wordCount; ++i)
    if (startsWith(pat, words[i]))
        return words[i] + patSize;
// Ensembl appears to have changed their format recently; handle both formats.
wordCount = chopString(line+1, "|", words, ArraySize(words));
if (wordCount >= 3)
    char *ptr = strchr(words[2], '.');
    if (ptr != NULL) *ptr = 0;
errAbort("Couldn't find '%s' key for transcript name line %d of %s",
    pat, lf->lineIx, lf->fileName);
return NULL;
int main(int argc, char *argv[])
char *inName, *name;
int chunkSize = 4048*1024;
FILE *in;
int accSize = 0;
int newAccSize;
int oneSize;
char line[512];
int lineCount;
char *words[16];
int wordCount;
struct slName *bacs = NULL, *bn;
char *dirName;
char *outDir;

if (argc != 4)
inName = argv[1];
dirName = argv[2];
outDir = argv[3];

in = mustOpen(inName, "r");

while (fgets(line, sizeof(line), in))
    char *sizeString;
    wordCount = chopLine(line, words);
    if (wordCount == 0)
    if (wordCount != 9)
	errAbort("Line %d of %s doesn't look like an ls -l line", lineCount, inName);
    sizeString = words[4];
    if (!isdigit(sizeString[0]))
	errAbort("Line %d of %s doesn't look like an ls - l line", lineCount, inName);
    name = words[8];
    oneSize = atoi(sizeString);
    newAccSize = accSize + oneSize;
    if (newAccSize > chunkSize)
	finishJob(&bacs, accSize);
	accSize = oneSize;
	if (oneSize > chunkSize)
	    warn("Size %d of %s exceed chunk size %d", oneSize, name, chunkSize);
	accSize = newAccSize;
    bn = newSlName(name);
    slAddHead(&bacs, bn);
if (bacs != NULL)
    finishJob(&bacs, accSize);
printf("%d total jobs\n", jobCount);
writeInLists(outDir, dirName);
//writeJobs("job", "in", startMachine, stopMachine, "cc");
struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb)
/* get conservation wiggle table names and labels from trackDb setting,
   ignoring those where table doesn't exist */
char *fields[20];
int fieldCt;
int i;
char *wigTable, *wigLabel;
struct consWiggle *wig, *wigList = NULL;
char *setting = trackDbSetting(tdb, CONS_WIGGLE);
if (!setting)
    return NULL;
fieldCt = chopLine(cloneString(setting), fields);
for (i = 0; i < fieldCt; i += 3)
    wigTable = fields[i];
    if (hTableExists(db, wigTable));
        wig->table = cloneString(wigTable);
        wigLabel = (i+1 == fieldCt ? DEFAULT_CONS_LABEL : fields[i+1]);
        wig->leftLabel = cloneString(wigLabel);
        wigLabel = (i+2 >= fieldCt ? wig->leftLabel : fields[i+2]);
        wig->uiLabel = cloneString(wigLabel);
        slAddTail(&wigList, wig);
return wigList;
void getSizes(char *fileName, int *retU, int *retN)
/* Add together sizes in a gold  file */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
int start,end,size;
int u = 0, n = 0;

while (lineFileNext(lf, &line, &lineSize))
    wordCount = chopLine(line, words);
    if (wordCount < 8)
	errAbort("Short line %d of %s\n", lf->lineIx, lf->fileName);
    start = atoi(words[1]) - 1;
    end = atoi(words[2]);
    size = end-start;
    if (words[4][0] == 'N' || words[4][0] == 'U')
	n += size;
	u += size;
*retU = u;
*retN = n;
Esempio n. 13
static void getXrefInfo(struct sqlConnection *conn,
			char **retXrefTable, char **retIdField,
			char **retAliasField)
/* See if curTrack specifies an xref/alias table for lookup of IDs. */
char *xrefSpec = curTrack ? trackDbSetting(curTrack, "idXref") : NULL;
char *xrefTable = NULL, *idField = NULL, *aliasField = NULL;
if (xrefSpec != NULL)
    char *words[3];
    chopLine(cloneString(xrefSpec), words);
    if (isEmpty(words[2]))
	errAbort("trackDb error: track %s, setting idXref must be followed "
		 "by three words (xrefTable, idField, aliasField).",
    xrefTable = words[0];
    idField = words[1];
    aliasField = words[2];
    if (!sqlTableExists(conn, xrefTable) ||
	sqlFieldIndex(conn, xrefTable, idField) < 0 ||
	sqlFieldIndex(conn, xrefTable, aliasField) < 0)
	xrefTable = idField = aliasField = NULL;
if (retXrefTable != NULL)
    *retXrefTable = xrefTable;
if (retIdField != NULL)
    *retIdField = idField;
if (retAliasField != NULL)
    *retAliasField = aliasField;
Esempio n. 14
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end)
/* rt1dCreate - find items in 1-D range tree. */
struct lineFile *lf = lineFileOpen(tabFile, TRUE);
struct crTreeFile *crf = crTreeFileOpen(treeFile);
struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end);
verbose(2, "Got %d overlapping blocks\n", slCount(blockList));
for (block = blockList; block != NULL; block = block->next)
    verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size);
    lineFileSeek(lf, block->offset, SEEK_SET);
    bits64 sizeUsed = 0;
    while (sizeUsed < block->size)
	char *line;
	int size;
	if (!lineFileNext(lf, &line, &size))
	    errAbort("Couldn't read %s\n", lf->fileName);
	char *parsedLine = cloneString(line);
	char *row[3];
	if (chopLine(parsedLine, row) != ArraySize(row))
	    errAbort("Badly formatted line of %s\n%s", lf->fileName, line);
	char *bedChrom = row[0];
	bits32 bedStart = sqlUnsigned(row[1]);
	bits32 bedEnd = sqlUnsigned(row[2]);
	if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0)
	    fprintf(stdout, "%s\n", line);
	sizeUsed += size;
Esempio n. 15
int main(int argc, char *argv[])
FILE *in = stdin;
FILE *out = stdout;
char origLine[1024];
char line[1024];
char *words[256];
int wordCount;
struct hash *hash;
int wordIx;
char *word;

if (argc != 2 || !isdigit(argv[1][0]))
    errAbort("Usage: %s wordIx", argv[0]);
wordIx = atoi(argv[1]);
hash = newHash(14);
while (fgets(line, sizeof(line), in))
    strcpy(origLine, line);
    wordCount = chopLine(line, words);
    if (wordCount < 1 || words[0][0] == '#')
    if (wordCount >= wordIx)
	word = words[wordIx-1];
	if (!hashLookup(hash, word))
	    fprintf(out, "%s", origLine);
	    hashAdd(hash, word, NULL);
bool ProjectAnimData::readMotionOnly(QFile *file){
    if (!file || !file->isOpen()){
        return false;
    QByteArray line;
    auto ok = false;
    auto lineCount = 0UL;
    if (!chopLine(file, line, lineCount)){
        return false;
    auto blocksize = line.toULong(&ok);
    if (!ok){
        return false;
    animationMotionDataLines = blocksize;
    while (!file->atEnd()){ //Read individual projects...
        lineCount = 0;
        for (; lineCount < blocksize;){  //Get animation motion data...
            animationMotionData.append(new SkyrimAnimationMotionData(this));
            if (!animationMotionData.last()->read(file, lineCount)){
                return false;
        return true;
    return false;
Esempio n. 17
struct hash *makePairHash(char *pairFile)
/* Make up a hash table out of paired ESTs. */
FILE *f = mustOpen(pairFile, "r");
char line[256];
char *words[3];
int wordCount;
int lineCount = 0;
struct hash *hash;
struct hashEl *h5, *h3;
struct estPair *ep;
char *name5, *name3;

hash = newHash(19);
while (fgets(line, sizeof(line), f))
    wordCount = chopLine(line, words);
    if (wordCount == 0)
    if (wordCount != 2)
        errAbort("%d words in pair line %d of %s", wordCount, lineCount, pairFile);
    name5 = words[0];
    name3 = words[1];
    h5 = hashAdd(hash, name5, ep);
    h3 = hashAdd(hash, name3, ep);
    ep->name5 = h5->name;
    ep->name3 = h3->name;
    slAddHead(&estPairList, ep);
printf("Read %d lines of pair info\n", lineCount);
return hash;
struct psl *nextPsl(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
char *line;
int lineSize;
char *words[32];
int wordCount;
struct psl *psl;

if (!lineFileNext(lf, &line, &lineSize))
    return NULL;
wordCount = chopLine(line, words);
if (wordCount == 21)
    return pslLoad(words);
    errAbort("Bad line %d of %s, %d words expecting %d", lf->lineIx, lf->fileName, wordCount, 21);
    return NULL;
Esempio n. 19
static boolean parseBlockLine(struct blastFile *bf, int *startRet, int *endRet,
                           struct dyString *seq)
/* read and parse the next target or query line, like:
 *   Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488
 * also handle broken NCBI tblastn output like:
 * Ignores and returns FALSE on bogus records generated by PSI BLAST, such as
 *   Query: 0   --------------------------                                  
 *   Sbjct: 38  PPGPPGVAGGNQTTVVVIYGPPGPPG                                   63
 *   Query: 0                                                               
 *   Sbjct: 63                                                               63
 * If FALSE is returned, the output parameters will be unchanged.
char* line = bfNeedNextLine(bf);
int a, b, s, e;
char *words[16];
int wordCount = chopLine(line, words);
if ((wordCount < 2) || (wordCount > 4) || !(sameString("Query:", words[0]) || sameString("Sbjct:", words[0])))

/* look for one of the bad formats to ignore, as described above */
if (((wordCount == 2) && isAllDigits(words[1]))
    || ((wordCount == 3) && isAllDigits(words[1]) && isAllDigits(words[2]))
    || ((wordCount == 3) && isAllDigits(words[1]) && isAllDashes(words[2])))
    bfWarn(bf, "Ignored invalid alignment format for aligned sequence pair");
    return FALSE;

/* special handling for broken output with no space between start and
 * sequence */
if (wordCount == 3)
    char *p;
    if (!isdigit(words[1][0]) || !isdigit(words[2][0]))
    a = atoi(words[1]);
    b = atoi(words[2]);
    p = words[1];
    while ((*p != '\0') && (isdigit(*p)))
    dyStringAppend(seq, p);
    if (!isdigit(words[1][0]) || !isdigit(words[3][0]))
    a = atoi(words[1]);
    b = atoi(words[3]);
    dyStringAppend(seq, words[2]);
s = min(a,b);
e = max(a,b);
*startRet = min(s, *startRet);
*endRet = max(e, *endRet);
return TRUE;
Esempio n. 20
void addCtgFile(char *liftFileName, struct ctgPos **pCtgList)
/* Create ctgPos's out of liftSpecs in liftFile. */
struct lineFile *lf = lineFileOpen(liftFileName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct liftSpec lift;
struct ctgPos *ctg;

printf("Processing %s\n", liftFileName);
while (lineFileNext(lf, &line, &lineSize))
    wordCount = chopLine(line, words);
    if (wordCount == 0)
    if (wordCount != 5)
        errAbort("Expecting 5 words line %d of %s", lf->lineIx, lf->fileName);
    liftSpecStaticLoad(words, &lift);
    ctg->contig = cloneString(skipPastSlash(lift.oldName));
    ctg->size = lift.oldSize;
    ctg->chrom = cloneString(lift.newName);
    ctg->chromStart = lift.offset;
    ctg->chromEnd = lift.offset + lift.oldSize;
    slAddHead(pCtgList, ctg);
Esempio n. 21
void checkInputOpenFiles(struct inInfo *array, int count)
/* Make sure all of the input is there and of right format before going forward. Since
 * this is going to take a while we want to fail fast. */
int i;
for (i=0; i<count; ++i)
    struct inInfo *in = &array[i];
    switch (in->type)
	case itBigWig:
	    /* Just open and close, it will abort if any problem. */
	    in->bbi = bigWigFileOpen(in->fileName);
	case itPromoterBed:
	case itUnstrandedBed:
	case itBlockedBed:
	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
	    char *line;
	    lineFileNeedNext(lf, &line, NULL);
	    char *dupe = cloneString(line);
	    char *row[256];
	    int wordCount = chopLine(dupe, row);
	    struct bed *bed = NULL;
	    switch (in->type)
		case itPromoterBed:
		    lineFileExpectAtLeast(lf, 6, wordCount);
		    bed = bedLoadN(row, 6);
		    char strand = bed->strand[0];
		    if (strand != '+' && strand != '-')
		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
		case itUnstrandedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 4);
		case itBlockedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 12);
Esempio n. 22
struct cmChrom *wuParse(char *inName, struct hash *cloneHash, 
	struct cloneInfo **pCloneList, struct hash *ctgHash)
/* Parse wash U style clone map into common
 * intermediate format. */
struct lineFile *in = lineFileOpen(inName, TRUE);
int lineSize;
char *line;
char *words[16];
int wordCount;
char chromName[32];
char lastChromName[32];
boolean isOrdered;
char *s;
struct cmChrom *chromList = NULL, *chrom = NULL;
struct hash *ntHash = newHash(0);

strcpy(lastChromName, "");
while (lineFileNext(in, &line, &lineSize))
    struct cmContig **pContigList;

    if (line[0] == '#') continue;
    if (!startsWith("start human SUPERLINK", line))
    wordCount = chopLine(line, words);
    if (wordCount != 5 && wordCount != 4)
	errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName);
    if (words[wordCount-1][0] != '*')
	errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName);
    s = strrchr(words[2], '.');
    if (s == NULL)
	errAbort("Couldn't find chromosome line %d of 5s\n", in->lineIx, in->fileName);
    s += 1;
    strncpy(chromName, s, sizeof(chromName));
    if (!sameString(chromName, lastChromName))
	strcpy(lastChromName, chromName);
	printf("Reading %s\n", chromName);
	chrom->name = cloneString(chromName);
	slAddHead(&chromList, chrom);
    isOrdered = sameWord(words[3], "ORDERED");
    pContigList = (isOrdered ? &chrom->orderedList : &chrom->randomList);
    if (*pContigList != NULL)
	errAbort("Duplicate chromosome %s %s",
		chromName, words[3]);
    if (isFinChrom(chromName))
    if (sameString(chromName, "NA"))
	readNa(in, chrom, pContigList, cloneHash, pCloneList, ctgHash);
	readContigList(in, chrom, pContigList, sameString(chromName, "COMMIT"), 
		!isOrdered, cloneHash, pCloneList, ctgHash, ntHash);
return chromList;
Esempio n. 23
void alignNt(char *nt)
/* Do alignments of draft bacs against one NT. */
char indexFileName[512];
char ntFaName[512];
struct lineFile *indexLf;
int lineSize;
char *line;
char *words[3];
int wordCount;
struct patSpace *ps;
struct dnaSeq *ntSeq;

printf("<H1>Check Layout of %s</H1>\n", nt);
sprintf(ntFaName, "%s/p%s.fa", faDir, nt);
ntSeq = faReadAllDna(ntFaName);
ps = makePatSpace(&ntSeq, 1, oocFile, 10, 500);
sprintf(indexFileName, "%s/%s.index", indexDir, nt);
uglyf("Checking out %s and %s\n", indexFileName, ntFaName);
indexLf = lineFileOpen(indexFileName, TRUE);
while (lineFileNext(indexLf, &line, &lineSize))
    wordCount = chopLine(line, words);
    if (wordCount > 0)
	char bacFaName[512];
	struct dnaSeq *contigList, *contig;
	char *bacAcc = words[0];
	char *s = strrchr(bacAcc, '.');
	if (s != NULL)
	    *s = 0;
	uglyf("%s\n", bacAcc);
	sprintf(bacFaName, "%s/%s.fa", faDir, bacAcc);
	contigList = faReadAllDna(bacFaName);
	for (contig = contigList; contig != NULL; contig = contig->next)
	    boolean isRc;
	    uglyf(" %s\n", contig->name);
	    for (isRc = FALSE; isRc <= TRUE; isRc += 1)
		struct ssBundle *bunList, *bun;
		bunList = ssFindBundles(ps, contig, contig->name, ffTight);
		for (bun = bunList; bun != NULL; bun = bun->next)
		    showBundle(bun, isRc);
		reverseComplement(contig->dna, contig->size);
void writeBedTab(char *fileName, struct bedStub *bedList, int bedSize)
/* Write out bed list to tab-separated file. */
struct bedStub *bed;
FILE *f = mustOpen(fileName, "w");
char *words[64];
int i, wordCount;
for (bed = bedList; bed != NULL; bed = bed->next)
    if (!noBin)
        if (fprintf(f, "%u\t", hFindBin(bed->chromStart, bed->chromEnd)) <= 0)
    if (strictTab)
	wordCount = chopTabs(bed->line, words);
	wordCount = chopLine(bed->line, words);
    for (i=0; i<wordCount; ++i)
	/*	new definition for old "reserved" field, now itemRgb */
	/*	and when itemRgb, it is a comma separated string r,g,b */
	if (itemRgb && (i == 8))
	    char *comma;
	    /*  Allow comma separated list of rgb values here   */
	    comma = strchr(words[8], ',');
	    if (comma)
		int itemRgb = 0;
		if (-1 == (itemRgb = bedParseRgb(words[8])))
		    errAbort("ERROR: expecting r,g,b specification, "
				"found: '%s'", words[8]);
		    if (fprintf(f, "%d", itemRgb) <= 0)

		verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb);
		if (fputs(words[i], f) == EOF)
	    if (fputs(words[i], f) == EOF)

	if (i == wordCount-1)
	    if (fputc('\n', f) == EOF)
	    if (fputc('\t', f) == EOF)
void liftGl(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) 
/* Lift up coordinates in .gl file. */ 
char dirBuf[256], chromName[256];
int i; 
char *source; 
char *contig; 
FILE *dest = mustOpen(destFile, "w"); 
struct lineFile *lf = NULL;
int lineSize, wordCount;
char *line, *words[32];
struct liftSpec *spec;
int offset;

if (how == carryMissing)
    warn("'carry' doesn't work for .gl files, ignoring");
for (i=0; i<sourceCount; ++i)
    source = sources[i];
    verbose(1, "Processing %s\n", source);
    contig = contigInDir(source, dirBuf);
    verbose(2,"#\tcontig: %s, source: %s, dirBuf: %s\n", contig, source, dirBuf);
    if (!startsWith("ctg", contig) &&
	!startsWith("NC_", contig) &&
	!startsWith("NT_", contig) &&
	!startsWith("NG_", contig))
	sprintf(chromName, "chr%s", contig);
	contig = chromName;
    verbose(2,"#\tcontig: %s, chromName: %s\n", contig, chromName);
    spec = findLift(liftHash, contig, lf);
    if (spec == NULL)
    offset = spec->offset;
    lf = lineFileMayOpen(source, TRUE);
    if (lf == NULL)
	warn("%s doesn't exist, skipping", source);
    while (lineFileNext(lf, &line, &lineSize))
	int s, e;
	if ((wordCount = chopLine(line, words)) != 4)
	    errAbort("Bad line %d of %s", lf->lineIx, lf->fileName);
	s = atoi(words[1]);
	e = atoi(words[2]);
	fprintf(dest, "%s\t%d\t%d\t%s\n", words[0], s+offset, e+offset, words[3]);
    if (dots)
        verbose(1, "\n");
Esempio n. 26
void edwFixRevoked(char *database, char *inFile)
/* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */
/* inFile is in format:
 *    metaVariable objStatus revoked [- reason]
 *    metaObject name */
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char *line;
char *defaultReason = "Revoked in ENCODE2";
char *reason = defaultReason;
while (lineFileNextReal(lf, &line))
    if (startsWithWord("metaVariable", line))
	char *pattern = "metaVariable objStatus revoked";
	if (startsWithWord(pattern, line))
	    reason = skipLeadingSpaces(line + strlen(pattern));
	    if (isEmpty(reason))
	        reason = defaultReason;
		if (reason[0] == '-')
		   reason = skipLeadingSpaces(reason + 1);
		reason = cloneString(reason);
	    errAbort("??? %s\n", line);
    else if (startsWithWord("metaObject", line))
	char *row[3];
	int wordCount = chopLine(line, row);
	if (wordCount != 2)
	    errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName);
	char *prefix = row[1];
	if (!startsWith("wgEncode", prefix))
	    errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName);
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix);
	struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query);
	printf("# %s %s\n", prefix, reason);
	for (ef = efList; ef != NULL; ef = ef->next)
	    long long id = ef->id;
	    printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id);
        errAbort("Unrecognized first word in %s\n", line);
Esempio n. 27
void addStageInfo(char *gsDir, struct hash *cloneHash)
/* Add info about which file and what stage clone is in. */
/* TSF - This is no longer used due to unavailability of *.finf files - 4/7/2003 */
    static char *finfFiles[] = {"ffa/finished.finf", "ffa/draft.finf",
                                "ffa/predraft.finf", "ffa/extras.finf"
    static char stages[] = "FDPD";
    struct lineFile *lf;
    char *line;
    char *words[7];
    int numStages = strlen(stages);
    int i;
    char pathName[512];
    char *finfFile, stage;
    int warnsLeft = maxWarn; /* Only show first maxWarn warnings about missing clones. */
    char cloneName[256];
    struct clonePos *clone;
    int wordCount, cloneCount;

    for (i=0; i<numStages; ++i)
        finfFile = finfFiles[i];
        stage = stages[i];
        sprintf(pathName, "%s/%s", gsDir, finfFile);
        printf("Processing %s\n", pathName);
        lf = lineFileOpen(pathName, TRUE);
        cloneCount = 0;
        while (lineFileNext(lf, &line, NULL))
            wordCount = chopLine(line, words);
            assert(wordCount == 7);
            strncpy(cloneName, words[1], sizeof(cloneName));
            if ((clone = hashFindVal(cloneHash, cloneName)) == NULL)
                if (warnsLeft > 0)
                    warn("%s is in %s but not in ooDir/*/*.gl", cloneName, pathName);
                else if (warnsLeft == 0)
                    warn("(Truncating additional warnings)");
            clone->stage[0] = stage;
        printf("Got %d clones in %s\n", cloneCount, pathName);
Esempio n. 28
static void agpToFa(char *agpFile, char *agpSeq, char *faOut, char *seqDir)
/* agpToFa - Convert a .agp file to a .fa file. */
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *line, *words[16];
int lineSize, wordCount;
int lastPos = 0;
struct agpFrag *agpList = NULL, *agp;
FILE *f = mustOpen(faOut, "w");
char *prevChrom = NULL;

verbose(2,"#\tprocessing AGP file: %s\n", agpFile);
while (lineFileNext(lf, &line, &lineSize))
    if (line[0] == 0 || line[0] == '#' || line[0] == '\n')
    wordCount = chopLine(line, words);
    if (wordCount < 5)
        errAbort("Bad line %d of %s: need at least 5 words, got %d\n",
		 lf->lineIx, lf->fileName, wordCount);
    if (! (sameWord("all", agpSeq) || sameWord(words[0], agpSeq)))
    if (prevChrom != NULL && !sameString(prevChrom, words[0]))
	agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
	lastPos = 0;
    if (words[4][0] != 'N' && words[4][0] != 'U')
	lineFileExpectAtLeast(lf, 9, wordCount);
	agp = agpFragLoad(words);
	/* file is 1-based but agpFragLoad() now assumes 0-based: */
	agp->chromStart -= 1;
	agp->fragStart  -= 1;
	if (agp->chromStart != lastPos)
	    errAbort("Start doesn't match previous end line %d of %s\n",
		     lf->lineIx, lf->fileName);
	if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
	    errAbort("Sizes don't match in %s and %s line %d of %s\n",
		     agp->chrom, agp->frag, lf->lineIx, lf->fileName);
	slAddHead(&agpList, agp);
	lastPos = agp->chromEnd;
	lastPos = lineFileNeedNum(lf, words, 2);
    if (prevChrom == NULL || !sameString(prevChrom, words[0]))
	prevChrom = cloneString(words[0]);
agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
Esempio n. 29
static void parseDatabaseLines(struct blastFile *bf, char *line, struct blastQuery *bq)
/* Process something like:
 * Database: chr22.fa 
 *        977 sequences; 95,550,797 total letters
static struct dyString *tmpBuf = NULL;
char *words[16];
int wordCount;
if (bq->database != NULL)
    bfError(bf, "already parse Database:");

if (tmpBuf == NULL)
    tmpBuf = dyStringNew(512);

/* parse something like
 * Database: celegans98
 * some versions of blastp include the absolute path, but
 * then split it across lines.
wordCount = chopLine(line, words);
if (wordCount < 2)
    bfError(bf, "Expecting database name");
dyStringAppend(tmpBuf, words[1]);
while (line = bfNeedNextLine(bf), !isspace(line[0]))
    dyStringAppend(tmpBuf, line);
bq->database = cloneString(tmpBuf->string);

/* Process something like:
 *        977 sequences; 95,550,797 total letters
wordCount = chopLine(line, words);
if (wordCount < 3 || !isdigit(words[0][0]) || !isdigit(words[2][0]))
    bfError(bf, "Expecting database info");
bq->dbSeqCount = atoi(words[0]);
bq->dbBaseCount = atoi(words[2]);
Esempio n. 30
void viewWaba(char *wabName)
/* Show human readable waba alignment. */
struct lineFile *lf = lineFileOpen(wabName, TRUE);
int lineSize;
char *line;
char *qSym;
char *tSym;
char *hSym;
int symCount;
int wordCount, partCount;
char *words[16], *parts[4];
int qStart, qEnd, tStart, tEnd;
char strand;

while (lineFileNext(lf, &line, &lineSize))
    printf("%s\n", line);
    wordCount = chopLine(line, words);
    if (wordCount != 10)
        errAbort("Funny info line %d of %s\n", lf->lineIx, lf->fileName);
    partCount = chopString(words[6], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad query range line %d of %s\n", lf->lineIx, lf->fileName);
    qStart = atoi(parts[1]);
    qEnd = atoi(parts[2]);
    strand = words[7][0];
    partCount = chopString(words[8], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad target range line %d of %s\n", lf->lineIx, lf->fileName);
    tStart = atoi(parts[1]);
    tEnd = atoi(parts[2]);

    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    symCount = strlen(line);
    qSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    tSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    hSym = cloneString(line);
    if (strand == '+')
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qStart, tStart, '+', '+', 60);
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qEnd, tStart, '-', '+', 60);