Ejemplo n.º 1
0
struct mapPos *readInfoFile(char *mapName)
/* Read maps from file. */
{
struct lineFile *lf = lineFileOpen(mapName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct mapPos *list = NULL, *el;

lineFileNeedNext(lf, &line, &lineSize);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#')
        continue;
    wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 3, wordCount);
    AllocVar(el);
    el->cloneName = cloneString(words[0]);
    el->pos = atoi(words[1]);
    el->phase = atoi(words[2]);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}
Ejemplo n.º 2
0
static void parseColumnHeaderRow(struct vcfFile *vcff, char *line)
/* Make sure column names are as we expect, and store genotype sample IDs if any are given. */
{
if (line[0] != '#')
    {
    vcfFileErr(vcff, "Expected to find # followed by column names (\"#CHROM POS ...\"), "
	       "not \"%s\"", line);
    lineFileReuse(vcff->lf);
    return;
    }
char *words[VCF_MAX_COLUMNS];
int wordCount = chopLine(line+1, words);
if (wordCount >= VCF_MAX_COLUMNS)
    vcfFileErr(vcff, "header contains at least %d columns; "
	       "VCF_MAX_COLUMNS may need to be increased in vcf.c!", VCF_MAX_COLUMNS);
expectColumnName(vcff, "CHROM", words, 0);
expectColumnName(vcff, "POS", words, 1);
expectColumnName(vcff, "ID", words, 2);
expectColumnName(vcff, "REF", words, 3);
expectColumnName(vcff, "ALT", words, 4);
expectColumnName2(vcff, "QUAL", "PROB", words, 5);
expectColumnName(vcff, "FILTER", words, 6);
expectColumnName(vcff, "INFO", words, 7);
if (wordCount > 8)
    {
    expectColumnName(vcff, "FORMAT", words, 8);
    if (wordCount < 10)
	vcfFileErr(vcff, "FORMAT column is given, but no sample IDs for genotype columns...?");
    vcff->genotypeCount = (wordCount - 9);
    vcff->genotypeIds = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(char *));
    int i;
    for (i = 9;  i < wordCount;  i++)
	vcff->genotypeIds[i-9] = vcfFileCloneStr(vcff, words[i]);
    }
}
Ejemplo n.º 3
0
struct blastFile *blastFileOpenVerify(char *fileName)
/* Open file, read and verify header. */
{
struct blastFile *bf;
char *line;
char *words[16];
int wordCount;
struct lineFile *lf;

AllocVar(bf);
bf->lf = lf = lineFileOpen(fileName, TRUE);
bf->fileName = cloneString(fileName);

/* Parse first line - something like: */
line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if (wordCount < 3)
    bfBadHeader(bf);
bf->program = cloneString(words[0]);
bf->version = cloneString(words[1]);
bf->buildDate = cloneString(words[2]);
if (!wildMatch("*BLAST*", bf->program))
    bfBadHeader(bf);
if (!isdigit(bf->version[0]))
    bfBadHeader(bf);
if (bf->buildDate[0] != '[')
    bfBadHeader(bf);
return bf;
}
Ejemplo n.º 4
0
static struct gfRange *gfQuerySeq(int conn, struct dnaSeq *seq)
/* Ask server for places sequence hits. */
{
struct gfRange *rangeList = NULL, *range;
char buf[256], *row[6];
int rowSize;

startSeqQuery(conn, seq, "query");

/* Read results line by line and save in list, and return. */
for (;;)
    {
    netRecieveString(conn, buf);
    if (sameString(buf, "end"))
	{
	break;
	}
    else if (startsWith("Error:", buf))
        {
	gfServerWarn(seq, buf);
	break;
	}
    else
	{
	rowSize = chopLine(buf, row);
	if (rowSize < 6)
	    errAbort("Expecting 6 words from server got %d", rowSize);
	range = gfRangeLoad(row);
	slAddHead(&rangeList, range);
	}
    }
slReverse(&rangeList);
return rangeList;
}
Ejemplo n.º 5
0
boolean wormGeneForOrf(char *orfName, char *geneNameBuf, int bufSize)
/* Look for gene type (unc-12 or something) synonym for cosmid.N name. */
{
FILE *f;
char fileName[512];
char lineBuf[512];
int nameLen = strlen(orfName);
boolean ok = FALSE;

sprintf(fileName, "%sorf2gene", wormFeaturesDir());
f = mustOpen(fileName, "r");
while (fgets(lineBuf, sizeof(lineBuf), f))
    {
    if (strncmp(lineBuf, orfName, nameLen) == 0 && lineBuf[nameLen] == ' ')
        {
        char *words[2];
        int wordCount;
        wordCount = chopLine(lineBuf, words);
        assert((int)strlen(words[1]) < bufSize);
        strncpy(geneNameBuf, words[1], bufSize);
        ok = TRUE;
        break;
        }
    }
fclose(f);
return ok;
}
int findBedSize(char *fileName, struct lineFile **retLf)
/* Read first line of file and figure out how many words in it. */
/* Input file could be stdin, in which case we really don't want to open,
 * read, and close it here.  So if retLf is non-NULL, return the open 
 * linefile (having told it to reuse the line we just read). */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[64], *line;
int wordCount;

if (!lineFileNextReal(lf, &line))
    if (ignoreEmpty)
        return(0);
line = cloneString(line);
if (strictTab)
    wordCount = chopTabs(line, words);
else
    wordCount = chopLine(line, words);
if (wordCount == 0)
    errAbort("%s appears to be empty", fileName);
if (retLf != NULL)
    {
    lineFileReuse(lf);
    *retLf = lf;
    }
else
    lineFileClose(&lf);
freeMem(line);
return wordCount;
}
Ejemplo n.º 7
0
struct lump *readLumps(char *fileName)
/* Read in lumps from file. */
{
struct lump *lumpList = NULL, *lump = NULL;
char line[1024];
int lineCount;
char *words[3];
int wordCount;
boolean isIndented;
FILE *f = mustOpen(fileName, "r");

while (fgets(line, sizeof(line), f))
    {
    ++lineCount;
    isIndented = isspace(line[0]);
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;   /* Allow blank lines. */
    if (isIndented)
        {
        if (wordCount != 2 || !isdigit(words[0][0]))
            errAbort("Bad line %d of %s\n", lineCount, fileName);
        lump->count += atoi(words[0]);
        }
    else
        {
        AllocVar(lump);
        lump->seq = cloneString(words[0]);
        slAddHead(&lumpList, lump);
        }
    }
fclose(f);
slReverse(&lumpList);
return lumpList;
}
Ejemplo n.º 8
0
struct pgo *readC2g(char *fileName)
/* Read a C2g file into memory. */
{
FILE *f = mustOpen(fileName, "r");
struct pgo *list = NULL, *el;
char lineBuf[128];
char *words[4];
int wordCount;
int lineCount = 0;

while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    wordCount = chopLine(lineBuf, words);
    if (wordCount == 0)
        continue;   /* Ignore blank lines. */
    if (wordCount != 3)
        {
        errAbort("Strange line starting with %s line %d of %s",
            words[0], lineCount, fileName);
        }
    AllocVar(el);
    if (!wormParseChromRange(words[0], &el->chrom, &el->start, &el->end))
        errAbort("Bad chromosome range line %d of %s", lineCount, fileName);
    el->strand = words[1][0];
    el->gene = cloneString(words[2]);
    slAddHead(&list, el);
    }
slReverse(&list);
return list;
}
Ejemplo n.º 9
0
char *findEnsTrans(struct lineFile *lf, char *line)
/* Find transcript name out of ensemble line.  Squawk and die
 * if a problem. */
{
char *words[32];
int wordCount, i;
char *pat = "Translation:";
int patSize = strlen(pat);

wordCount = chopLine(line+1, words);
for (i=0; i<wordCount; ++i)
    {
    if (startsWith(pat, words[i]))
        return words[i] + patSize;
    }
// Ensembl appears to have changed their format recently; handle both formats.
wordCount = chopString(line+1, "|", words, ArraySize(words));
if (wordCount >= 3)
    {
    char *ptr = strchr(words[2], '.');
    if (ptr != NULL) *ptr = 0;
    return(words[2]);
    }
errAbort("Couldn't find '%s' key for transcript name line %d of %s",
    pat, lf->lineIx, lf->fileName);
return NULL;
}
int main(int argc, char *argv[])
{
char *inName, *name;
int chunkSize = 4048*1024;
FILE *in;
int accSize = 0;
int newAccSize;
int oneSize;
char line[512];
int lineCount;
char *words[16];
int wordCount;
struct slName *bacs = NULL, *bn;
char *dirName;
char *outDir;

if (argc != 4)
    usage();
inName = argv[1];
dirName = argv[2];
outDir = argv[3];


in = mustOpen(inName, "r");

while (fgets(line, sizeof(line), in))
    {
    char *sizeString;
    ++lineCount;
    wordCount = chopLine(line, words);
    if (wordCount == 0)
	continue;
    if (wordCount != 9)
	errAbort("Line %d of %s doesn't look like an ls -l line", lineCount, inName);
    sizeString = words[4];
    if (!isdigit(sizeString[0]))
	errAbort("Line %d of %s doesn't look like an ls - l line", lineCount, inName);
    name = words[8];
    oneSize = atoi(sizeString);
    newAccSize = accSize + oneSize;
    if (newAccSize > chunkSize)
	{
	finishJob(&bacs, accSize);
	accSize = oneSize;
	if (oneSize > chunkSize)
	    warn("Size %d of %s exceed chunk size %d", oneSize, name, chunkSize);
	}
    else
	{
	accSize = newAccSize;
	}
    bn = newSlName(name);
    slAddHead(&bacs, bn);
    }
if (bacs != NULL)
    finishJob(&bacs, accSize);
printf("%d total jobs\n", jobCount);
writeInLists(outDir, dirName);
//writeJobs("job", "in", startMachine, stopMachine, "cc");
}
struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb)
/* get conservation wiggle table names and labels from trackDb setting,
   ignoring those where table doesn't exist */
{
char *fields[20];
int fieldCt;
int i;
char *wigTable, *wigLabel;
struct consWiggle *wig, *wigList = NULL;
char *setting = trackDbSetting(tdb, CONS_WIGGLE);
if (!setting)
    return NULL;
fieldCt = chopLine(cloneString(setting), fields);
for (i = 0; i < fieldCt; i += 3)
    {
    wigTable = fields[i];
    if (hTableExists(db, wigTable));
        {
        AllocVar(wig);
        wig->table = cloneString(wigTable);
        wigLabel = (i+1 == fieldCt ? DEFAULT_CONS_LABEL : fields[i+1]);
        wig->leftLabel = cloneString(wigLabel);
        wigLabel = (i+2 >= fieldCt ? wig->leftLabel : fields[i+2]);
        wig->uiLabel = cloneString(wigLabel);
        slAddTail(&wigList, wig);
        }
    }
return wigList;
}
void getSizes(char *fileName, int *retU, int *retN)
/* Add together sizes in a gold  file */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
int start,end,size;
int u = 0, n = 0;

while (lineFileNext(lf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount < 8)
	errAbort("Short line %d of %s\n", lf->lineIx, lf->fileName);
    start = atoi(words[1]) - 1;
    end = atoi(words[2]);
    size = end-start;
    if (words[4][0] == 'N' || words[4][0] == 'U')
	n += size;
    else
	u += size;
    }
lineFileClose(&lf);
*retU = u;
*retN = n;
}
Ejemplo n.º 13
0
static void getXrefInfo(struct sqlConnection *conn,
			char **retXrefTable, char **retIdField,
			char **retAliasField)
/* See if curTrack specifies an xref/alias table for lookup of IDs. */
{
char *xrefSpec = curTrack ? trackDbSetting(curTrack, "idXref") : NULL;
char *xrefTable = NULL, *idField = NULL, *aliasField = NULL;
if (xrefSpec != NULL)
    {
    char *words[3];
    chopLine(cloneString(xrefSpec), words);
    if (isEmpty(words[2]))
	errAbort("trackDb error: track %s, setting idXref must be followed "
		 "by three words (xrefTable, idField, aliasField).",
		 curTrack->track);
    xrefTable = words[0];
    idField = words[1];
    aliasField = words[2];
    if (!sqlTableExists(conn, xrefTable) ||
	sqlFieldIndex(conn, xrefTable, idField) < 0 ||
	sqlFieldIndex(conn, xrefTable, aliasField) < 0)
	xrefTable = idField = aliasField = NULL;
    }
if (retXrefTable != NULL)
    *retXrefTable = xrefTable;
if (retIdField != NULL)
    *retIdField = idField;
if (retAliasField != NULL)
    *retAliasField = aliasField;
}
Ejemplo n.º 14
0
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end)
/* rt1dCreate - find items in 1-D range tree. */
{
struct lineFile *lf = lineFileOpen(tabFile, TRUE);
struct crTreeFile *crf = crTreeFileOpen(treeFile);
struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end);
verbose(2, "Got %d overlapping blocks\n", slCount(blockList));
for (block = blockList; block != NULL; block = block->next)
    {
    verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size);
    lineFileSeek(lf, block->offset, SEEK_SET);
    bits64 sizeUsed = 0;
    while (sizeUsed < block->size)
        {
	char *line;
	int size;
	if (!lineFileNext(lf, &line, &size))
	    errAbort("Couldn't read %s\n", lf->fileName);
	char *parsedLine = cloneString(line);
	char *row[3];
	if (chopLine(parsedLine, row) != ArraySize(row))
	    errAbort("Badly formatted line of %s\n%s", lf->fileName, line);
	char *bedChrom = row[0];
	bits32 bedStart = sqlUnsigned(row[1]);
	bits32 bedEnd = sqlUnsigned(row[2]);
	if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0)
	    fprintf(stdout, "%s\n", line);
	freeMem(parsedLine);
	sizeUsed += size;
	}
    }
crTreeFileClose(&crf);
}
Ejemplo n.º 15
0
int main(int argc, char *argv[])
{
FILE *in = stdin;
FILE *out = stdout;
char origLine[1024];
char line[1024];
char *words[256];
int wordCount;
struct hash *hash;
int wordIx;
char *word;

if (argc != 2 || !isdigit(argv[1][0]))
    errAbort("Usage: %s wordIx", argv[0]);
	
wordIx = atoi(argv[1]);
hash = newHash(14);
while (fgets(line, sizeof(line), in))
    {
    strcpy(origLine, line);
    wordCount = chopLine(line, words);
    if (wordCount < 1 || words[0][0] == '#')
	continue;
    if (wordCount >= wordIx)
	{
	word = words[wordIx-1];
	if (!hashLookup(hash, word))
	    {
	    fprintf(out, "%s", origLine);
	    hashAdd(hash, word, NULL);
	    }
	}
    }
}
bool ProjectAnimData::readMotionOnly(QFile *file){
    if (!file || !file->isOpen()){
        return false;
    }
    QByteArray line;
    auto ok = false;
    auto lineCount = 0UL;
    if (!chopLine(file, line, lineCount)){
        return false;
    }
    auto blocksize = line.toULong(&ok);
    if (!ok){
        return false;
    }
    animationMotionDataLines = blocksize;
    while (!file->atEnd()){ //Read individual projects...
        lineCount = 0;
        for (; lineCount < blocksize;){  //Get animation motion data...
            animationMotionData.append(new SkyrimAnimationMotionData(this));
            if (!animationMotionData.last()->read(file, lineCount)){
                return false;
            }
        }
        return true;
    }
    return false;
}
Ejemplo n.º 17
0
struct hash *makePairHash(char *pairFile)
/* Make up a hash table out of paired ESTs. */
{
FILE *f = mustOpen(pairFile, "r");
char line[256];
char *words[3];
int wordCount;
int lineCount = 0;
struct hash *hash;
struct hashEl *h5, *h3;
struct estPair *ep;
char *name5, *name3;

hash = newHash(19);
while (fgets(line, sizeof(line), f))
    {
    ++lineCount;
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;
    if (wordCount != 2)
        errAbort("%d words in pair line %d of %s", wordCount, lineCount, pairFile);
    name5 = words[0];
    name3 = words[1];
    AllocVar(ep);
    h5 = hashAdd(hash, name5, ep);
    h3 = hashAdd(hash, name3, ep);
    ep->name5 = h5->name;
    ep->name3 = h3->name;
    slAddHead(&estPairList, ep);
    }
printf("Read %d lines of pair info\n", lineCount);
return hash;
}
struct psl *nextPsl(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;
struct psl *psl;

if (!lineFileNext(lf, &line, &lineSize))
    {
    return NULL;
    }
wordCount = chopLine(line, words);
if (wordCount == 21)
    {
    return pslLoad(words);
    }
else
    {
    errAbort("Bad line %d of %s, %d words expecting %d", lf->lineIx, lf->fileName, wordCount, 21);
    return NULL;
    }
}
Ejemplo n.º 19
0
static boolean parseBlockLine(struct blastFile *bf, int *startRet, int *endRet,
                           struct dyString *seq)
/* read and parse the next target or query line, like:
 *   Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488
 *
 * also handle broken NCBI tblastn output like:
 *   Sbjct: 1181YYGEQRSTNGQTIQLKTQVFRRFPDDDDESEDHDDPDNAHESPEQEGAEGHFDLHYYENQ 1360
 *
 * Ignores and returns FALSE on bogus records generated by PSI BLAST, such as
 *   Query: 0   --------------------------                                  
 *   Sbjct: 38  PPGPPGVAGGNQTTVVVIYGPPGPPG                                   63
 *   Query: 0                                                               
 *   Sbjct: 63                                                               63
 * If FALSE is returned, the output parameters will be unchanged.
 */
{
char* line = bfNeedNextLine(bf);
int a, b, s, e;
char *words[16];
int wordCount = chopLine(line, words);
if ((wordCount < 2) || (wordCount > 4) || !(sameString("Query:", words[0]) || sameString("Sbjct:", words[0])))
    bfSyntax(bf);

/* look for one of the bad formats to ignore, as described above */
if (((wordCount == 2) && isAllDigits(words[1]))
    || ((wordCount == 3) && isAllDigits(words[1]) && isAllDigits(words[2]))
    || ((wordCount == 3) && isAllDigits(words[1]) && isAllDashes(words[2])))
    {
    bfWarn(bf, "Ignored invalid alignment format for aligned sequence pair");
    return FALSE;
    }

/* special handling for broken output with no space between start and
 * sequence */
if (wordCount == 3)
    {
    char *p;
    if (!isdigit(words[1][0]) || !isdigit(words[2][0]))
        bfSyntax(bf);
    a = atoi(words[1]);
    b = atoi(words[2]);
    p = words[1];
    while ((*p != '\0') && (isdigit(*p)))
        p++;
    dyStringAppend(seq, p);
    }
else
    {
    if (!isdigit(words[1][0]) || !isdigit(words[3][0]))
        bfSyntax(bf);
    a = atoi(words[1]);
    b = atoi(words[3]);
    dyStringAppend(seq, words[2]);
    }
s = min(a,b);
e = max(a,b);
*startRet = min(s, *startRet);
*endRet = max(e, *endRet);
return TRUE;
}
Ejemplo n.º 20
0
void addCtgFile(char *liftFileName, struct ctgPos **pCtgList)
/* Create ctgPos's out of liftSpecs in liftFile. */
{
struct lineFile *lf = lineFileOpen(liftFileName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct liftSpec lift;
struct ctgPos *ctg;

printf("Processing %s\n", liftFileName);
while (lineFileNext(lf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;
    if (wordCount != 5)
        errAbort("Expecting 5 words line %d of %s", lf->lineIx, lf->fileName);
    liftSpecStaticLoad(words, &lift);
    AllocVar(ctg);
    ctg->contig = cloneString(skipPastSlash(lift.oldName));
    ctg->size = lift.oldSize;
    ctg->chrom = cloneString(lift.newName);
    ctg->chromStart = lift.offset;
    ctg->chromEnd = lift.offset + lift.oldSize;
    slAddHead(pCtgList, ctg);
    }
lineFileClose(&lf);
}
Ejemplo n.º 21
0
void checkInputOpenFiles(struct inInfo *array, int count)
/* Make sure all of the input is there and of right format before going forward. Since
 * this is going to take a while we want to fail fast. */
{
int i;
for (i=0; i<count; ++i)
    {
    struct inInfo *in = &array[i];
    switch (in->type)
        {
	case itBigWig:
	    {
	    /* Just open and close, it will abort if any problem. */
	    in->bbi = bigWigFileOpen(in->fileName);
	    break;
	    }
	case itPromoterBed:
	case itUnstrandedBed:
	case itBlockedBed:
	    {
	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
	    char *line;
	    lineFileNeedNext(lf, &line, NULL);
	    char *dupe = cloneString(line);
	    char *row[256];
	    int wordCount = chopLine(dupe, row);
	    struct bed *bed = NULL;
	    switch (in->type)
	        {
		case itPromoterBed:
		    lineFileExpectAtLeast(lf, 6, wordCount);
		    bed = bedLoadN(row, 6);
		    char strand = bed->strand[0];
		    if (strand != '+' && strand != '-')
		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
		    break;
		case itUnstrandedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 4);
		    break;
		case itBlockedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 12);
		    break;
		default:
		    internalErr();
		    break;
		}
	    bedFree(&bed);
	    freez(&dupe);
	    lineFileReuse(lf);
	    break;
	    }
	default:
	    internalErr();
	    break;
	}
    }
}
Ejemplo n.º 22
0
struct cmChrom *wuParse(char *inName, struct hash *cloneHash, 
	struct cloneInfo **pCloneList, struct hash *ctgHash)
/* Parse wash U style clone map into common
 * intermediate format. */
{
struct lineFile *in = lineFileOpen(inName, TRUE);
int lineSize;
char *line;
char *words[16];
int wordCount;
char chromName[32];
char lastChromName[32];
boolean isOrdered;
char *s;
struct cmChrom *chromList = NULL, *chrom = NULL;
struct hash *ntHash = newHash(0);

strcpy(lastChromName, "");
while (lineFileNext(in, &line, &lineSize))
    {
    struct cmContig **pContigList;

    if (line[0] == '#') continue;
    if (!startsWith("start human SUPERLINK", line))
	continue;
    wordCount = chopLine(line, words);
    if (wordCount != 5 && wordCount != 4)
	errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName);
    if (words[wordCount-1][0] != '*')
	errAbort("Odd start line %d of %s\n", in->lineIx, in->fileName);
    s = strrchr(words[2], '.');
    if (s == NULL)
	errAbort("Couldn't find chromosome line %d of 5s\n", in->lineIx, in->fileName);
    s += 1;
    strncpy(chromName, s, sizeof(chromName));
    if (!sameString(chromName, lastChromName))
	{
	strcpy(lastChromName, chromName);
	printf("Reading %s\n", chromName);
	AllocVar(chrom);
	chrom->name = cloneString(chromName);
	slAddHead(&chromList, chrom);
	}
    isOrdered = sameWord(words[3], "ORDERED");
    pContigList = (isOrdered ? &chrom->orderedList : &chrom->randomList);
    if (*pContigList != NULL)
	errAbort("Duplicate chromosome %s %s",
		chromName, words[3]);
    if (isFinChrom(chromName))
        continue;
    if (sameString(chromName, "NA"))
	readNa(in, chrom, pContigList, cloneHash, pCloneList, ctgHash);
    else
	readContigList(in, chrom, pContigList, sameString(chromName, "COMMIT"), 
		!isOrdered, cloneHash, pCloneList, ctgHash, ntHash);
    }
slReverse(&chromList);
return chromList;
}
Ejemplo n.º 23
0
void alignNt(char *nt)
/* Do alignments of draft bacs against one NT. */
{
char indexFileName[512];
char ntFaName[512];
struct lineFile *indexLf;
int lineSize;
char *line;
char *words[3];
int wordCount;
struct patSpace *ps;
struct dnaSeq *ntSeq;

printf("<H1>Check Layout of %s</H1>\n", nt);
printf("<PRE>");
sprintf(ntFaName, "%s/p%s.fa", faDir, nt);
ntSeq = faReadAllDna(ntFaName);
ps = makePatSpace(&ntSeq, 1, oocFile, 10, 500);
sprintf(indexFileName, "%s/%s.index", indexDir, nt);
uglyf("Checking out %s and %s\n", indexFileName, ntFaName);
indexLf = lineFileOpen(indexFileName, TRUE);
while (lineFileNext(indexLf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount > 0)
	{
	char bacFaName[512];
	struct dnaSeq *contigList, *contig;
	char *bacAcc = words[0];
	char *s = strrchr(bacAcc, '.');
	if (s != NULL)
	    *s = 0;
	uglyf("%s\n", bacAcc);
	sprintf(bacFaName, "%s/%s.fa", faDir, bacAcc);
	contigList = faReadAllDna(bacFaName);
	for (contig = contigList; contig != NULL; contig = contig->next)
	    {
	    boolean isRc;
	    uglyf(" %s\n", contig->name);
	    for (isRc = FALSE; isRc <= TRUE; isRc += 1)
		{
		struct ssBundle *bunList, *bun;
		bunList = ssFindBundles(ps, contig, contig->name, ffTight);
		for (bun = bunList; bun != NULL; bun = bun->next)
		    {
		    showBundle(bun, isRc);
		    }
		ssBundleFreeList(&bunList);
		reverseComplement(contig->dna, contig->size);
		}
	    }
	freeDnaSeqList(&contigList);
	}
    }
lineFileClose(&indexLf);
freeDnaSeqList(&ntSeq);
}
void writeBedTab(char *fileName, struct bedStub *bedList, int bedSize)
/* Write out bed list to tab-separated file. */
{
struct bedStub *bed;
FILE *f = mustOpen(fileName, "w");
char *words[64];
int i, wordCount;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (!noBin)
        if (fprintf(f, "%u\t", hFindBin(bed->chromStart, bed->chromEnd)) <= 0)
	    writeFailed(fileName);
    if (strictTab)
	wordCount = chopTabs(bed->line, words);
    else
	wordCount = chopLine(bed->line, words);
    for (i=0; i<wordCount; ++i)
        {
	/*	new definition for old "reserved" field, now itemRgb */
	/*	and when itemRgb, it is a comma separated string r,g,b */
	if (itemRgb && (i == 8))
	    {
	    char *comma;
	    /*  Allow comma separated list of rgb values here   */
	    comma = strchr(words[8], ',');
	    if (comma)
		{
		int itemRgb = 0;
		if (-1 == (itemRgb = bedParseRgb(words[8])))
		    errAbort("ERROR: expecting r,g,b specification, "
				"found: '%s'", words[8]);
		else
		    if (fprintf(f, "%d", itemRgb) <= 0)
			writeFailed(fileName);

		verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb);
		}
	    else
		if (fputs(words[i], f) == EOF)
		    writeFailed(fileName);
	    }
	else
	    if (fputs(words[i], f) == EOF)
		writeFailed(fileName);

	if (i == wordCount-1)
	    {
	    if (fputc('\n', f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputc('\t', f) == EOF)
		writeFailed(fileName);
	}
    }
fclose(f);
}
void liftGl(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) 
/* Lift up coordinates in .gl file. */ 
{ 
char dirBuf[256], chromName[256];
int i; 
char *source; 
char *contig; 
FILE *dest = mustOpen(destFile, "w"); 
struct lineFile *lf = NULL;
int lineSize, wordCount;
char *line, *words[32];
struct liftSpec *spec;
int offset;

if (how == carryMissing)
    warn("'carry' doesn't work for .gl files, ignoring");
for (i=0; i<sourceCount; ++i)
    {
    source = sources[i];
    verbose(1, "Processing %s\n", source);
    contig = contigInDir(source, dirBuf);
    verbose(2,"#\tcontig: %s, source: %s, dirBuf: %s\n", contig, source, dirBuf);
    if (!startsWith("ctg", contig) &&
	!startsWith("NC_", contig) &&
	!startsWith("NT_", contig) &&
	!startsWith("NG_", contig))
        {
	sprintf(chromName, "chr%s", contig);
	contig = chromName;
    verbose(2,"#\tcontig: %s, chromName: %s\n", contig, chromName);
	}
    spec = findLift(liftHash, contig, lf);
    if (spec == NULL)
        continue;
    cantHandleSpecRevStrand(spec);
    offset = spec->offset;
    lf = lineFileMayOpen(source, TRUE);
    if (lf == NULL)
        {
	warn("%s doesn't exist, skipping", source);
	continue;
	}
    while (lineFileNext(lf, &line, &lineSize))
	{
	int s, e;
	if ((wordCount = chopLine(line, words)) != 4)
	    errAbort("Bad line %d of %s", lf->lineIx, lf->fileName);
	s = atoi(words[1]);
	e = atoi(words[2]);
	fprintf(dest, "%s\t%d\t%d\t%s\n", words[0], s+offset, e+offset, words[3]);
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
}
Ejemplo n.º 26
0
void edwFixRevoked(char *database, char *inFile)
/* edwFixRevoked - Mark as deprecated files that are revoked in ENCODE2. */
/* inFile is in format:
 *    metaVariable objStatus revoked [- reason]
 *    metaObject name */
{
struct sqlConnection *conn = edwConnect();
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char *line;
char *defaultReason = "Revoked in ENCODE2";
char *reason = defaultReason;
while (lineFileNextReal(lf, &line))
    {
    if (startsWithWord("metaVariable", line))
        {
	char *pattern = "metaVariable objStatus revoked";
	if (startsWithWord(pattern, line))
	    {
	    reason = skipLeadingSpaces(line + strlen(pattern));
	    if (isEmpty(reason))
	        reason = defaultReason;
	    else
	        {
		if (reason[0] == '-')
		   reason = skipLeadingSpaces(reason + 1);
		reason = cloneString(reason);
		}
	    }
	else
	    errAbort("??? %s\n", line);
	}
    else if (startsWithWord("metaObject", line))
        {
	char *row[3];
	int wordCount = chopLine(line, row);
	if (wordCount != 2)
	    errAbort("Strange metaobject line %d of %s\n", lf->lineIx, lf->fileName);
	char *prefix = row[1];
	if (!startsWith("wgEncode", prefix))
	    errAbort("Strange object line %d of %s\n", lf->lineIx, lf->fileName);
	char query[512];
	sqlSafef(query, sizeof(query), 
	    "select * from edwFile where submitFileName like '%s/%%/%s%%'", database, prefix);
	struct edwFile *ef, *efList = edwFileLoadByQuery(conn, query);
	printf("# %s %s\n", prefix, reason);
	for (ef = efList; ef != NULL; ef = ef->next)
	    {
	    long long id = ef->id;
	    printf("update edwFile set deprecated='%s' where id=%lld;\n", reason, id);
	    }
	}
    else
        errAbort("Unrecognized first word in %s\n", line);
    }
}
Ejemplo n.º 27
0
void addStageInfo(char *gsDir, struct hash *cloneHash)
/* Add info about which file and what stage clone is in. */
/* TSF - This is no longer used due to unavailability of *.finf files - 4/7/2003 */
{
    static char *finfFiles[] = {"ffa/finished.finf", "ffa/draft.finf",
                                "ffa/predraft.finf", "ffa/extras.finf"
                               };
    static char stages[] = "FDPD";
    struct lineFile *lf;
    char *line;
    char *words[7];
    int numStages = strlen(stages);
    int i;
    char pathName[512];
    char *finfFile, stage;
    int warnsLeft = maxWarn; /* Only show first maxWarn warnings about missing clones. */
    char cloneName[256];
    struct clonePos *clone;
    int wordCount, cloneCount;

    for (i=0; i<numStages; ++i)
    {
        finfFile = finfFiles[i];
        stage = stages[i];
        sprintf(pathName, "%s/%s", gsDir, finfFile);
        printf("Processing %s\n", pathName);
        lf = lineFileOpen(pathName, TRUE);
        cloneCount = 0;
        while (lineFileNext(lf, &line, NULL))
        {
            wordCount = chopLine(line, words);
            assert(wordCount == 7);
            strncpy(cloneName, words[1], sizeof(cloneName));
            chopSuffix(cloneName);
            if ((clone = hashFindVal(cloneHash, cloneName)) == NULL)
            {
                if (warnsLeft > 0)
                {
                    --warnsLeft;
                    warn("%s is in %s but not in ooDir/*/*.gl", cloneName, pathName);
                }
                else if (warnsLeft == 0)
                {
                    --warnsLeft;
                    warn("(Truncating additional warnings)");
                }
                continue;
            }
            clone->stage[0] = stage;
            cloneCount++;
        }
        lineFileClose(&lf);
        printf("Got %d clones in %s\n", cloneCount, pathName);
    }
}
Ejemplo n.º 28
0
static void agpToFa(char *agpFile, char *agpSeq, char *faOut, char *seqDir)
/* agpToFa - Convert a .agp file to a .fa file. */
{
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *line, *words[16];
int lineSize, wordCount;
int lastPos = 0;
struct agpFrag *agpList = NULL, *agp;
FILE *f = mustOpen(faOut, "w");
char *prevChrom = NULL;

verbose(2,"#\tprocessing AGP file: %s\n", agpFile);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == 0 || line[0] == '#' || line[0] == '\n')
        continue;
    wordCount = chopLine(line, words);
    if (wordCount < 5)
        errAbort("Bad line %d of %s: need at least 5 words, got %d\n",
		 lf->lineIx, lf->fileName, wordCount);
    if (! (sameWord("all", agpSeq) || sameWord(words[0], agpSeq)))
	continue;
    if (prevChrom != NULL && !sameString(prevChrom, words[0]))
	{
	agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
	lastPos = 0;
	}
    if (words[4][0] != 'N' && words[4][0] != 'U')
	{
	lineFileExpectAtLeast(lf, 9, wordCount);
	agp = agpFragLoad(words);
	/* file is 1-based but agpFragLoad() now assumes 0-based: */
	agp->chromStart -= 1;
	agp->fragStart  -= 1;
	if (agp->chromStart != lastPos)
	    errAbort("Start doesn't match previous end line %d of %s\n",
		     lf->lineIx, lf->fileName);
	if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
	    errAbort("Sizes don't match in %s and %s line %d of %s\n",
		     agp->chrom, agp->frag, lf->lineIx, lf->fileName);
	slAddHead(&agpList, agp);
	lastPos = agp->chromEnd;
	}
    else
        {
	lastPos = lineFileNeedNum(lf, words, 2);
	}
    if (prevChrom == NULL || !sameString(prevChrom, words[0]))
	{
	freeMem(prevChrom);
	prevChrom = cloneString(words[0]);
	}
    }
agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
}
Ejemplo n.º 29
0
static void parseDatabaseLines(struct blastFile *bf, char *line, struct blastQuery *bq)
/* Process something like:
 * Database: chr22.fa 
 *        977 sequences; 95,550,797 total letters
 */
{
static struct dyString *tmpBuf = NULL;
char *words[16];
int wordCount;
if (bq->database != NULL)
    bfError(bf, "already parse Database:");

if (tmpBuf == NULL)
    tmpBuf = dyStringNew(512);

/* parse something like
 * Database: celegans98
 * some versions of blastp include the absolute path, but
 * then split it across lines.
 */
wordCount = chopLine(line, words);
if (wordCount < 2)
    bfError(bf, "Expecting database name");
dyStringClear(tmpBuf);
dyStringAppend(tmpBuf, words[1]);
while (line = bfNeedNextLine(bf), !isspace(line[0]))
    {
    dyStringAppend(tmpBuf, line);
    }
bq->database = cloneString(tmpBuf->string);

/* Process something like:
 *        977 sequences; 95,550,797 total letters
 */
wordCount = chopLine(line, words);
if (wordCount < 3 || !isdigit(words[0][0]) || !isdigit(words[2][0]))
    bfError(bf, "Expecting database info");
decomma(words[0]);
decomma(words[2]);
bq->dbSeqCount = atoi(words[0]);
bq->dbBaseCount = atoi(words[2]);
}
Ejemplo n.º 30
0
void viewWaba(char *wabName)
/* Show human readable waba alignment. */
{
struct lineFile *lf = lineFileOpen(wabName, TRUE);
int lineSize;
char *line;
char *qSym;
char *tSym;
char *hSym;
int symCount;
int wordCount, partCount;
char *words[16], *parts[4];
int qStart, qEnd, tStart, tEnd;
char strand;

while (lineFileNext(lf, &line, &lineSize))
    {
    printf("%s\n", line);
    wordCount = chopLine(line, words);
    if (wordCount != 10)
        errAbort("Funny info line %d of %s\n", lf->lineIx, lf->fileName);
    partCount = chopString(words[6], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad query range line %d of %s\n", lf->lineIx, lf->fileName);
    qStart = atoi(parts[1]);
    qEnd = atoi(parts[2]);
    strand = words[7][0];
    partCount = chopString(words[8], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad target range line %d of %s\n", lf->lineIx, lf->fileName);
    tStart = atoi(parts[1]);
    tEnd = atoi(parts[2]);

    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    symCount = strlen(line);
    qSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    tSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    hSym = cloneString(line);
    if (strand == '+')
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qStart, tStart, '+', '+', 60);
    else
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qEnd, tStart, '-', '+', 60);
    freeMem(hSym);
    freeMem(tSym);
    freeMem(qSym);
    }
lineFileClose(&lf);
}