Beispiel #1
0
struct peakSource *peakSourceLoadAll(char *fileName, int dimCount)
/* Read file, parse it line by line and return list of peakSources. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int rowSize = dimCount + 6;
char *row[rowSize];
struct peakSource *sourceList = NULL, *source;
while (lineFileNextRow(lf, row, rowSize))
    {
    /* Allocate struct and read in fixed fields. */
    AllocVar(source);
    source->dataSource = cloneString(row[0]);
    source->chromColIx = sqlUnsigned(row[1]);
    source->startColIx = sqlUnsigned(row[2]);
    source->endColIx = sqlUnsigned(row[3]);
    source->scoreColIx = sqlUnsigned(row[4]);
    source->normFactor = sqlDouble(row[5]);

    /* Read in dimension labels. */
    AllocArray(source->labels, dimCount);
    int i;
    for (i=0; i<dimCount; ++i)
        source->labels[i] = cloneString(row[i+6]);

    /* Calculate required columns. */
    int minColCount = max(source->chromColIx, source->startColIx);
    minColCount = max(minColCount, source->endColIx);
    minColCount = max(minColCount, source->scoreColIx);
    source->minColCount = minColCount + 1;
    slAddHead(&sourceList, source);
    }
lineFileClose(&lf);
slReverse(&sourceList);
return sourceList;
}
struct sangRead *readReads(char *fileName, struct hash *pairHash)
/* Read in read database file and hook it up to pairs in pairHash. */
{
struct sangRead *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[4];
int wordCount;
struct sangPair *pair;

printf("Reading %s\n", fileName);
while (lineFileNextRow(lf, words, 4))
    {
    el = sangReadLoad(words);
    slAddHead(&list, el);
    pair = hashMustFindVal(pairHash, el->id);
    if (el->pq[0] == 'p')
	{
	if (pair->fRead)
	    warn("%s - duplicate p read line %d of %s\n", el->id, lf->lineIx, lf->fileName);
	pair->fRead = el;
	}
    else
	{
	if (pair->rRead)
	    warn("%s - duplicate q read line %d of %s\n", el->id, lf->lineIx, lf->fileName);
        pair->rRead = el;
	}
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}
Beispiel #3
0
void readGbAcc(struct lineFile *gaf)
/* Read in and record all genbank accessions that have sequences */
{
  struct gb *gb;
  char *acc[1];
  struct sts *s;

  while (lineFileNextRow(gaf, acc, 1))
    {
      if (!hashLookup(gbAccHash, acc[0]))
	{
	  AllocVar(gb);
	  gb->next = NULL;
	  gb->acc = cloneString(acc[0]);
	  gb->s = NULL;
	  gb->gbSeq = TRUE;
	  hashAdd(gbAccHash, acc[0], gb);
	  if (hashLookup(nameHash, acc[0]))
	    {
	      s = hashMustFindVal(nameHash, acc[0]);
	      addElement(acc[0], &s->si->genbank, &s->si->gbCount);
	      removeElement(acc[0], &s->si->otherNames, &s->si->nameCount);
	    }
	} 
      else 
	{
	  gb = hashMustFindVal(gbAccHash, acc[0]);
	  gb->gbSeq = TRUE;
	}
    }
}
Beispiel #4
0
static void processPslFile(struct sqlConnection *conn, struct gbSelect* select,
                           struct gbStatusTbl* statusTbl, char* pslPath)
/* Parse a psl file looking for accessions to add to the database. */
{
char* row[PSL_NUM_COLS];
struct lineFile *pslLf = gzLineFileOpen(pslPath);
while (lineFileNextRow(pslLf, row, PSL_NUM_COLS))
    {
    struct psl* psl = pslLoad(row);
    processPsl(conn, select, statusTbl, psl, pslLf);
    pslFree(&psl);
    }
gzLineFileClose(&pslLf);
}
Beispiel #5
0
static void processOIFile(struct sqlConnection *conn, struct gbSelect* select,
                          struct gbStatusTbl* statusTbl, char* oiPath)
/* Parse a psl file looking for accessions to add to the database. */
{
char *row[EST_ORIENT_INFO_NUM_COLS];
struct lineFile *oiLf = gzLineFileOpen(oiPath);
while (lineFileNextRow(oiLf, row, EST_ORIENT_INFO_NUM_COLS))
    {
    struct estOrientInfo* oi = estOrientInfoLoad(row);
    processOI(conn, select, statusTbl, oi, oiLf);
    estOrientInfoFree(&oi);
    }
gzLineFileClose(&oiLf);
}
Beispiel #6
0
void initKillList()
/* Load up a hash of the accessions to avoid. */
{
struct lineFile *lf = NULL;
char *killFile = optionVal("killList", NULL);
char *words[1];
assert(killFile);
killHash = newHash(10);
lf = lineFileOpen(killFile, TRUE);
while(lineFileNextRow(lf, words, ArraySize(words)))
    {
    hashAddInt(killHash, words[0], 1);
    }
lineFileClose(&lf);
}
Beispiel #7
0
void readTissueLibraryIntoCache(char *fileName)
/* Read in the tissue and library information from fileName. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[3];
struct slInt *tissue = NULL, *library = NULL;
tissLibHash = newHash(12);
while(lineFileNextRow(lf, words, ArraySize(words))) 
    {
    library = newSlInt(sqlSigned(words[1]));
    tissue = newSlInt(sqlSigned(words[2]));
    slAddTail(&library, tissue);
    hashAdd(tissLibHash, words[0], library);
    }
lineFileClose(&lf);
}
Beispiel #8
0
void protDat(char *protName, char *blatName, char *aliasFile, char *outName)
{
FILE *outFile = mustOpen(outName, "w");
struct hash *protHash = newHash(10);
struct hash *blatHash = newHash(10);
struct hash *aliasHash = newHash(10);
struct psl *psls, *pslPtr, *protPsls, *blatPsl;
struct lineFile *lf = lineFileOpen(aliasFile, TRUE);
struct alias *alPtr;
char buffer[1024];
char *words[3];
int numWords = optionExists("fb") ? 2 : 3;

while (lineFileNextRow(lf, words, numWords))
    {
    AllocVar(alPtr);
    alPtr->kgName = cloneString(words[1]);
    if (numWords == 3)
	alPtr->spName = cloneString(words[2]);
    hashAdd(aliasHash, cloneString(words[0]), alPtr);
    }

protPsls = pslLoadAll(protName);

pslPtr = psls = pslLoadAll(blatName);
for(; pslPtr; pslPtr = pslPtr->next)
    hashAdd(blatHash, pslPtr->qName, pslPtr);

for(pslPtr = protPsls; pslPtr; pslPtr = pslPtr->next)
    {
    if ((blatPsl = hashFindVal(blatHash, pslPtr->qName)) != NULL)
	{
	if ((alPtr = hashFindVal(aliasHash, pslPtr->qName)) != NULL)
	    {
	    if (numWords == 3)
		sprintf(buffer,"%s.%s:%d-%d.%s.%s",pslPtr->qName,blatPsl->tName, 
		    blatPsl->tStart, blatPsl->tEnd,alPtr->kgName, alPtr->spName); 
	    else
		sprintf(buffer,"%s.%s:%d-%d.%s",pslPtr->qName,blatPsl->tName, 
		    blatPsl->tStart, blatPsl->tEnd,alPtr->kgName); 
	    pslPtr->qName = buffer;
	    pslTabOut(pslPtr, outFile);
	    }
	}
    }
}
Beispiel #9
0
static struct hash *readLift(char *liftAcross)
/* read in liftAcross file, create hash of srcName as hash key,
 *	hash elements are simple lists of coordinate relationships
 *	return them all sorted by start position
 */
{
char *row[6];
struct hash *result = newHash(8);
struct hashEl *hel = NULL;
struct lineFile *lf = lineFileOpen(liftAcross, TRUE);
while (lineFileNextRow(lf, row, ArraySize(row)))
    {
    struct liftSpec *liftSpec;
    hel = hashStore(result, row[0]);		/* srcName hash	*/
    AllocVar(liftSpec);
    liftSpec->start = sqlUnsigned(row[1]);	/* src start	*/
    liftSpec->end = sqlUnsigned(row[2]);	/* src end	*/
    liftSpec->dstName = cloneString(row[3]);	/* dstName	*/
    liftSpec->dstStart = sqlUnsigned(row[4]);	/* dst start	*/
    liftSpec->strand = '+';			/* dst strand	*/
    if ('-' == *row[5])
	liftSpec->strand = '-';
    /* accumulate list of lift specs under the srcName hash	*/
    slAddHead(&(hel->val), liftSpec);
    }

/*	Go through each srcName in the hash, and sort the list there by
 *	the start coordinate of each item.  The searching will expect
 *	them to be in order.
 */
struct hashCookie cookie = hashFirst(result);
while ((hel = hashNext(&cookie)) != NULL)
    {
    slSort(&(hel->val), lsStartCmp);
    if (verboseLevel() > 2)
	{
	struct liftSpec *ls;
	for (ls = hel->val; ls != NULL; ls = ls->next)
	    verbose(3, "# %s\t%d\t%d\t%s\t%d\t%c\n", hel->name, ls->start,
		ls->end, ls->dstName, ls->dstStart, ls->strand);
	}
    }

return result;
}
struct sangRange *readRanges(char *fileName, struct hash *hash)
/* Read range file into list/hash. */
{
struct sangRange *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[3];
int wordCount;

printf("Reading %s\n", fileName);
while (lineFileNextRow(lf, words, 3))
    {
    el = sangRangeLoad(words);
    slAddHead(&list, el);
    hashAddUnique(hash, el->name, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}
Beispiel #11
0
struct hash *readBed(char *fileName)
/* Read bed and return it as a hash keyed by chromName
 * with binKeeper values. */
{
char *row[5];
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = newHash(0);
int expectedCols = bScore ? 5 : 3;

while (lineFileNextRow(lf, row, expectedCols))
    {
    struct binKeeper *bk;
    struct bed5 *bed;
    struct hashEl *hel = hashLookup(hash, row[0]);
    if (hel == NULL)
       {
       bk = binKeeperNew(0, 1024*1024*1024);
       hel = hashAdd(hash, row[0], bk);
       }
    bk = hel->val;
    AllocVar(bed);
    bed->chrom = hel->name;
    bed->start = lineFileNeedNum(lf, row, 1);
    bed->end = lineFileNeedNum(lf, row, 2);
    if (bScore)
	bed->score = lineFileNeedNum(lf, row, 4);
    if (bed->start > bed->end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (bed->start == bed->end)
	{
	if (allowStartEqualEnd)
	    // Note we are tweaking binKeeper coords here, so use bed->start and bed->end.
	    binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed);
	else
	    lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
	}
    else
	binKeeperAdd(bk, bed->start, bed->end, bed);
    }
lineFileClose(&lf);
return hash;
}
Beispiel #12
0
void bedFileStats(char *bedFile, int colCount, FILE *f)
/* Collect stats on sizes of things in a bed file, and scores too. */
{
struct lineFile *lf = lineFileOpen(bedFile, TRUE);
struct slDouble *sizeList=NULL, *scoreList=NULL, *el;
char *row[colCount];
while (lineFileNextRow(lf, row, colCount))
    {
    int size = sqlUnsigned(row[endColIx]) - sqlUnsigned(row[startColIx]);
    el = slDoubleNew(size);
    slAddHead(&sizeList, el);
    double score = sqlDouble(row[scoreColIx]);
    el = slDoubleNew(score);
    slAddHead(&scoreList, el);
    }
fprintf(f, "%s\t%d\tsize:", bedFile, slCount(scoreList));
printStats(f, sizeList);
fprintf(f, "\tscore:");
printStats(f, scoreList);
fprintf(f, "\n");
lineFileClose(&lf);
}
static double minOfCol(char *fileName, int colIx)
/* Return minimum value seen in given column of file. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int minColCount = colIx+1;
char *row[minColCount];
boolean gotAny = FALSE;
double minVal = 0;
while (lineFileNextRow(lf, row, minColCount))
    {
    double val = lineFileNeedDouble(lf, row, colIx);
    if (!gotAny || val < minVal)
	{
	gotAny = TRUE;
        minVal = val;
	}
    }
lineFileClose(&lf);
if (!gotAny)
    errAbort("No data in %s", fileName);
return minVal;
}
Beispiel #14
0
struct sage *loadSageTags(char *fileName, int numExps)
{
    struct sage *sgList=NULL, *sg=NULL;
    char *words[3];
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    while(lineFileNextRow(lf, words,3)) 
	{
	    if(sg == NULL || sg->uni != atoi(words[0]))
		{
		    if(sg != NULL) 
			slSafeAddHead(&sgList,sg);
		    sg = createNewSage(numExps);
		    sg->uni = atoi(words[0]);
		    snprintf(sg->gb, sizeof(sg->gb), "unknown");
		    snprintf(sg->gi, sizeof(sg->gb), "unknown");
		    sg->description = cloneString(words[1]);
		    sg->numTags =1;
		    assert(strlen(words[2]) <= 10);
		    sg->tags = needMem(sizeof(char*) * 1);
		    sg->tags[0] = needMem(sizeof(char) * 11);
		    strcpy(sg->tags[0],words[2]);
		}
	    else 
		{
		    sg->tags = needMoreMem(sg->tags, (sg->numTags*sizeof(char*)), ((sg->numTags+1)*sizeof(char*)));
		    sg->tags[sg->numTags] = needMem(sizeof(char) * 11);
		    strcpy(sg->tags[sg->numTags],words[2]);
		    sg->numTags++;
		}
	}
    return(sgList);
    /*for(sg=sgList; sg != NULL; sg = sg->next)
      {
      sageTabOut(sg,stdout);
      }*/
}
struct sangPair *readPairs(char *fileName, struct hash *pairHash, struct hash *rangeHash)
/* Read in pair file and connect pairs to relevant range. */
{
struct sangPair *list = NULL, *el;
struct hashEl *hel;
struct sangInsert si;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[2];
int wordCount;

printf("Reading %s\n", fileName);
while (lineFileNextRow(lf, words, 2))
    {
    sangInsertStaticLoad(words, &si);
    AllocVar(el);
    hel = hashAddUnique(pairHash, si.id, el);
    el->name = hel->name;
    el->range = hashMustFindVal(rangeHash, si.name);
    slAddHead(&list, el);
    }
slReverse(&list);
lineFileClose(&lf);
return list;
}
Beispiel #16
0
int *readInConservationVals(char *fileName)
/* Open up the file and read in the conservation scores.
   return an array indexed by base position with the conservation
   scores. Free with freez() */
{
struct lineFile *lf = NULL;
int *scores = NULL;
int chromSize = optionInt("chromSize", 0);
int i = 0;
char *words[2];
if(chromSize == 0)
    errAbort("Can't have chromSize set to 0.");
warn("Reading in conservation");
setMaxAlloc(sizeof(*scores)*chromSize+1);
AllocArray(scores, chromSize);

/* Make empty data be -1, a not possible score. */
for(i = 0; i < chromSize; i++)
    scores[i] = -1;

/* Open up our conservation file. */
if(sameString(fileName, "stdin"))
    lf = lineFileStdin(TRUE);
else
    lf = lineFileOpen(fileName, TRUE);

dotForUserInit( chromSize/10 > 1 ? chromSize/10 : 1);
while(lineFileNextRow(lf, words, ArraySize(words)))
    {
    scores[atoi(words[0])] = round(atof(words[1]) * FLOAT_CHEAT);
    dotForUser();
    }
lineFileClose(&lf);
warn("Done");
return scores;
}
void ctgToChromFa(char *chromName, char *insertFile, char *chromDir, 
	char *orderLst, char *outName, struct hash *liftHash)
/* ctgToChromFa - convert contig level fa files to chromosome level. */
{
struct hash *uniq = newHash(0);
struct bigInsert *bi;
struct chromInserts *chromInserts;
struct hash *insertHash = newHash(9);
struct lineFile *lf = lineFileOpen(orderLst, TRUE);
FILE *f = mustOpen(outName, "w");
char ctgFaName[512];
char *words[2];
int liftChromSize = 0;
int actualChromSize = 0;
boolean isFirst = TRUE;

chromInsertsRead(insertFile, insertHash);
chromInserts = hashFindVal(insertHash, chromName);
fprintf(f, ">%s\n", chromName);
while (lineFileNextRow(lf, words, 1))
    {
    char *contig = words[0];
    int nSize;
    
    if (liftHash != NULL)
        {
	struct lift *lift = hashMustFindVal(liftHash, contig);
	nSize = lift->nBefore;
	liftChromSize = lift->chromSize;
	}
    else
        nSize = chromInsertsGapSize(chromInserts, rmChromPrefix(contig), isFirst);
    hashAddUnique(uniq, contig, NULL);
    addN(f, nSize);
    actualChromSize += nSize;
    isFirst = FALSE;
    sprintf(ctgFaName, "%s/%s/%s.fa", chromDir, contig, contig);
    if (fileExists(ctgFaName))
        {
	actualChromSize += addFa(f, ctgFaName);
	}
    else
        {
	warn("%s does not exist\n", ctgFaName);
	if (!cgiVarExists("missOk"))
	    noWarnAbort();
	}
    }
lineFileClose(&lf);
if (chromInserts != NULL)
    if  ((bi = chromInserts->terminal) != NULL)
        {
	addN(f, bi->size);
	actualChromSize += bi->size;
	}
if (liftHash != NULL)
    {
    if (actualChromSize > liftChromSize)
	errAbort("Error: chromosome size from lift file is %d, but actual fa size is %d.  Possible inconsistency between lift and inserts?",
		 liftChromSize, actualChromSize);
    else if (actualChromSize < liftChromSize)
	addN(f, (liftChromSize - actualChromSize));
    }
if (linePos != 0)
   fputc('\n', f);
fclose(f);
}
Beispiel #18
0
void checkExp(char *bedFileName, char *tNibDir, char *nibList)
{
struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL;
char *row[PSEUDOGENELINK_NUM_COLS] ;
struct pseudoGeneLink *ps;
char *tmpName[512], cmd[512];
struct axt *axtList = NULL, *axt, *mAxt = NULL;
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL;
struct nibInfo *qNib = NULL, *tNib = NULL;
FILE *op;
int ret;

if (nibHash == NULL)
    nibHash = hashNew(0);
while (lineFileNextRow(bf, row, ArraySize(row)))
    {
    struct misMatch *misMatchList = NULL;
    struct binKeeper *bk = NULL;
    struct binElement *el, *elist = NULL;
    struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ;
    struct misMatch *mf = NULL;
    ps = pseudoGeneLinkLoad(row);
    tmpName[0] = cloneString(ps->name);
    chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName));
    verbose(2,"name %s %s:%d-%d\n",
            ps->name, ps->chrom, ps->chromStart,ps->chromEnd);
    /* get expressed retro from hash */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        rPsl = el->val;
        verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd);
        }
    /* find mrnas that overlap parent gene */
    bk = hashFindVal(mrnaHash, ps->gChrom);
    elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ;
    for (el = elist; el != NULL ; el = el->next)
        {
        pPsl = el->val;
        verbose(2,"parent %s %s:%d %d,%d\n",
                pPsl->qName, pPsl->tName,pPsl->tStart,
                pPsl->match, pPsl->misMatch);
        }
    /* find self chain */
    bk = hashFindVal(chainHash, ps->chrom);
    elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ;
    slSort(&elist, chainCmpScoreDesc);
    for (el = elist; el != NULL ; el = el->next)
        {
        struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2;
        int qs = chain->qStart;
        int qe = chain->qEnd;
        int id = chain->id;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        if (!sameString(chain->qName , ps->gChrom) || 
                !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd))
            {
            verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd,
                ps->gChrom,ps->gStart,ps->gEnd);
            continue;
            }
        verbose(2,"chain id %d %4.0f",chain->id, chain->score);
        chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, 
            &subChain,  &retChainToFree);
        if (subChain != NULL)
            chain = subChain;
        chainSubsetOnQ(chain, ps->gStart, ps->gEnd, 
            &subChain,  &retChainToFree2);
        if (subChain != NULL)
            chain = subChain;
        if (chain->qStrand == '-')
            {
            qs = chain->qSize - chain->qEnd;
            qe = chain->qSize - chain->qStart;
            }
        verbose(2," %s:%d-%d %s:%d-%d ", 
                chain->qName, qs, qe, 
                chain->tName,chain->tStart,chain->tEnd);
        if (subChain != NULL)
            verbose(2,"subChain %s:%d-%d %s:%d-%d\n",
                    subChain->qName, subChain->qStart, subChain->qEnd, 
                    subChain->tName,subChain->tStart,subChain->tEnd);

	qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName);
	tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName);
	tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+');
	qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand);
	axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart,
	    maxGap, BIGNUM);
        verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList));
        for (axt = axtList; axt != NULL ; axt = axt->next)
            {
            addMisMatch(&misMatchList, axt, chain->qSize);
            }
        verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id);
        chainFree(&retChainToFree);
        chainFree(&retChainToFree2);
        break;
        }
    /* create axt of each expressed retroGene to parent gene */
        /* get alignment for each mrna overlapping retroGene */
    bk = hashFindVal(mrnaHash, ps->chrom);
    elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ;
    {
    char queryName[512];
    char axtName[512];
    char pslName[512];
    safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom);
    safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom);
    safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom);
    op = fopen(pslName,"w");
    for (el = elist ; el != NULL ; el = el->next)
        {
        psl = el->val;
        pslOutput(psl, op, '\t','\n');
        qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);

        if (qSeq != NULL)
            slAddHead(&seqList, qSeq);
        else
            errAbort("seq %s not found \n", psl->qName);
        }
    fclose(op);
    faWriteAll(queryName, seqList);
    safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName);
    ret = system(cmd);
    if (ret != 0)
        errAbort("ret is %d %s\n",ret,cmd);
    verbose(2, "ret is %d %s\n",ret,cmd);
    af = lineFileOpen(axtName, TRUE);
    while ((axt = axtRead(af)) != NULL)
        slAddHead(&mAxt, axt);
    lineFileClose(&af);
    }
    slReverse(&mAxt);
    /* for each parent/retro pair, count bases matching retro and parent better */
    for (el = elist; el != NULL ; el = el->next)
        {
        int i, scoreRetro=0, scoreParent=0, scoreNeither=0;
        struct dyString *parentMatch = newDyString(16*1024);
        struct dyString *retroMatch = newDyString(16*1024);
        mPsl = el->val;

        if (mAxt != NULL)
            {
            verbose(2,"mrna %s %s:%d %d,%d axt %s\n",
                    mPsl->qName, mPsl->tName,mPsl->tStart,
                    mPsl->match, mPsl->misMatch, 
                    mAxt->qName);
            assert(sameString(mPsl->qName, mAxt->qName));
            for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++)
                {
                int j = mAxt->tStart - mPsl->tStart;
                verbose(5, "listLen = %d\n",slCount(&misMatchList));
                if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL)
                    {
                    if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match retro[%d] %d %c == %c parent %c %d\n",
                                i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], 
                                mf->parentBase, mf->parentLoc);
                        dyStringPrintf(retroMatch, "%d,", mf->retroLoc);
                        scoreRetro++;
                        }
                    else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i]))
                        {
                        verbose (3,"match parent[%d] %d %c == %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        dyStringPrintf(parentMatch, "%d,", mf->parentLoc);
                        scoreParent++;
                        }
                    else
                        {
                        verbose (3,"match neither[%d] %d %c != %c retro %c %d\n",
                                i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], 
                                mf->retroBase, mf->retroLoc);
                        scoreNeither++;
                        }
                    }
                }
            verbose(2,"final score %s parent %d retro %d  neither %d\n",
                    mPsl->qName, scoreParent, scoreRetro, scoreNeither);
            fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n",
                    ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, 
                    mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, 
                    scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string);
            mAxt = mAxt->next;
            }
        dyStringFree(&parentMatch);
        dyStringFree(&retroMatch);
        }
    }
}
void ticksToWig(int startTick, char *inTable, char *outDensity, char *outAverage)
/* ticksToWig - Convert tab-delimited file of Unix time ticks, and possibly also 
 * numerical values to wig file(s).. */
{
struct lineFile *lf = lineFileOpen(inTable, TRUE);
FILE *densityFile = mustOpen(outDensity, "w");
printVarStepHead(densityFile);
FILE *averageFile = NULL;
if (outAverage != NULL)
    {
    averageFile = mustOpen(outAverage, "w");
    printVarStepHead(averageFile);
    }
int colsToParse = 1 + max(tickCol, valCol);
char *row[colsToParse];

time_t curTick = 0;
int sameTickCount = 0;
double tickTotal = 0;
double val = 0;
time_t tick;
while (lineFileNextRow(lf, row, colsToParse))
    {
    tick = lineFileNeedNum(lf, row, tickCol);
    if (averageFile != NULL)
       val = lineFileNeedDouble(lf, row, valCol);
    if (curTick != tick)
        {
	if (curTick > tick)
	    errAbort("Input isn't sorted - %ld > %ld line %d of %s\n", 
	    	(long)curTick, (long)tick, lf->lineIx, lf->fileName);
	if (startTick == 0)
	    startTick = tick;
        if (sameTickCount > 0)
	    {
	    fprintf(densityFile, "%ld\t%d\n", curTick - startTick + 1, sameTickCount);
	    time_t i;
	    for (i=curTick+1; i<tick; ++i)
		{
		fprintf(densityFile, "%ld\t%d\n", i - startTick + 1, 0);
		}
	   if (averageFile != NULL)
	       {
	       fprintf(averageFile, "%ld\t%f\n", 
	       		(long)curTick - startTick + 1, tickTotal/sameTickCount);
	       tickTotal = 0;
	       }
	    sameTickCount = 0;
	    }
        curTick = tick;
	}
    tickTotal += val;
    sameTickCount += 1;
    }
if (sameTickCount > 0)
   {
   fprintf(densityFile, "%ld\t%d\n", curTick - startTick + 1, sameTickCount);
   if (averageFile != NULL)
       fprintf(averageFile, "%ld\t%f\n", 
       		(long)curTick - startTick + 1, tickTotal/sameTickCount);
   }
carefulClose(&densityFile);
carefulClose(&averageFile);
}
Beispiel #20
0
void rcvs(char *codingTable, char *clusterTable)
/* rcvs - Compare riken noncoding vs. nonspliced. */
{
struct hash *idHash = newHash(16); // Key id1, val id2
struct hash *nonCodingHash = newHash(16);  // Key clusterId, value 
struct hash *splicedHash = newHash(16);  // Key id2, present if spliced
struct sqlConnection *conn = sqlConnect("mgsc");
struct sqlResult *sr;
char **row;
char *words[16];
int wordCount;
struct lineFile *lf;
int codingSpliced = 0;
int noncodingSpliced = 0;
int codingNonspliced = 0;
int noncodingNonspliced = 0;

/* Read id's into hash */
sr = sqlGetResult(conn, NOSQLINJ "select id1,id2 from rikenIds");
while ((row = sqlNextRow(sr)) != NULL)
    hashAdd(idHash, row[0], cloneString(row[1]));
sqlFreeResult(&sr);

/* Read spliced into hash */
sr = sqlGetResult(conn,
	NOSQLINJ "select name from rikenOrientInfo where intronOrientation != 0");
while ((row = sqlNextRow(sr)) != NULL)
    hashAdd(splicedHash, row[0], NULL);
sqlFreeResult(&sr);

/* Read noncoding clusters into hash */
lf = lineFileOpen(codingTable, TRUE);
while (lineFileNextRow(lf, words, 2))
    {
    if (sameString(words[1], "NoPProt"))
        hashAdd(nonCodingHash, words[0], NULL);
    }
lineFileClose(&lf);

/* Stream through cluster table counting and correlating. */
lf = lineFileOpen(clusterTable, TRUE);
while (lineFileNextRow(lf, words, 2))
    {
    char *cluster = words[0];
    char *id1 = words[1];
    char *id2 = hashMustFindVal(idHash, id1);
    if (hashLookup(nonCodingHash, cluster))
        {
	if (hashLookup(splicedHash, id2))
	    ++noncodingSpliced;
	else
	    ++noncodingNonspliced;
	}
    else
        {
	if (hashLookup(splicedHash, id2))
	    ++codingSpliced;
	else
	    ++codingNonspliced;
	}
    }
printf("noncodingNonspliced %d\n", noncodingNonspliced);
printf("noncodingSpliced %d\n", noncodingSpliced);
printf("codingNonspliced %d\n", codingNonspliced);
printf("codingSpliced %d\n", codingSpliced);
printf("total %d\n", noncodingNonspliced + noncodingSpliced + codingNonspliced + codingSpliced);
}
Beispiel #21
0
void bedItemOverlapCount(struct hash *chromHash, char *infile, char *outfile){
unsigned maxChromSize = 0;
unitSize *counts = (unitSize *)NULL;
FILE *f = mustOpen(outfile, "w");
struct hashCookie hc = hashFirst(chromHash);
struct hashEl *hel;
while( (hel = hashNext(&hc)) != NULL) {
    unsigned num = (unsigned) ptToInt(hel->val);
    maxChromSize = max(num, maxChromSize);
}
verbose(2,"#\tmaxChromSize: %u\n", maxChromSize);
if (maxChromSize < 1)
    errAbort("maxChromSize is zero ?");

/*	Allocate just once for the largest chrom and reuse this array */
counts = needHugeMem(sizeof(unitSize) * maxChromSize);

/*	Reset the array to be zero to be reused */
memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize);

unsigned chromSize = 0;
char *prevChrom = (char *)NULL;
boolean outputToDo = FALSE;
struct hash *seenHash = newHash(5);

    struct lineFile *bf = lineFileOpen(infile , TRUE);
    struct bed *bed = (struct bed *)NULL;
    char *row[12];
    int numFields = doBed12 ? 12 : 3;

    while (lineFileNextRow(bf,row, numFields))
	{
	int i;
	bed = bedLoadN(row, numFields);

	verbose(3,"#\t%s\t%d\t%d\n",bed->chrom,bed->chromStart, bed->chromEnd);

	if (prevChrom && differentWord(bed->chrom,prevChrom)) // End a chr
	    {
	    verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize);
	    if (outputToDo)
		outputCounts(counts, prevChrom, chromSize, f);
	    outputToDo = FALSE;
	    memset((void *)counts, 0,
		sizeof(unitSize)*(size_t)maxChromSize); /* zero counts */
	    freez(&prevChrom); 
	    // prevChrom is now NULL so it will be caught by next if!
	    }
	if ((char *)NULL == prevChrom)  // begin a chr
	    {
	    if (hashLookup(seenHash, bed->chrom))
		errAbort("ERROR:input file not sorted. %s seen before on line %d\n",
		    bed->chrom, bf->lineIx);

	    hashAdd(seenHash, bed->chrom, NULL);
	    prevChrom = cloneString(bed->chrom);
	    chromSize = hashIntVal(chromHash, prevChrom);
	    verbose(2,"#\tchrom %s starting, size %d\n", prevChrom,chromSize);
	    }
	if (bed->chromEnd > chromSize)
	    {
	    // check for circular chrM
	    if (doBed12 || bed->chromStart>=chromSize 
		|| differentWord(bed->chrom,"chrM")) 
		{
		warn("ERROR: %s\t%d\t%d", bed->chrom, bed->chromStart,
		bed->chromEnd);
		errAbort("chromEnd > chromSize ?  %d > %d", 
		    bed->chromEnd,chromSize);
		}

	    for (i = bed->chromStart; i < chromSize; ++i)
		INCWOVERFLOW(counts,i);
	    for (i = 0; i < (bed->chromEnd - chromSize); ++i)
		INCWOVERFLOW(counts,i);
	    }
	else if (doBed12)
	    {
	    int *starts = bed->chromStarts;
	    int *sizes = bed->blockSizes;
	    int *endStarts = &bed->chromStarts[bed->blockCount];

	    for(; starts < endStarts; starts++, sizes++)
		{
		unsigned int end = *starts + *sizes + bed->chromStart;
		for (i = *starts + bed->chromStart; i < end; ++i)
		    INCWOVERFLOW(counts,i);
		}
	    }
	else
	    {
	    for (i = bed->chromStart; i < bed->chromEnd; ++i)
		INCWOVERFLOW(counts, i);
	    }
	outputToDo = TRUE;
	bedFree(&bed); // plug the memory leak
	}

    lineFileClose(&bf);
    // Note, next file could be on same chr!

if (outputToDo)
    outputCounts(counts, prevChrom, chromSize, f);

if (doOutBounds)
    fprintf(stderr, "min %lu max %lu\n", (unsigned long)overMin, (unsigned long)overMax);

verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize);
carefulClose(&f);
freeMem(counts);
freez(&prevChrom);
// hashFreeWithVals(&chromHash, freez);
freeHash(&seenHash);
}