struct peakSource *peakSourceLoadAll(char *fileName, int dimCount) /* Read file, parse it line by line and return list of peakSources. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); int rowSize = dimCount + 6; char *row[rowSize]; struct peakSource *sourceList = NULL, *source; while (lineFileNextRow(lf, row, rowSize)) { /* Allocate struct and read in fixed fields. */ AllocVar(source); source->dataSource = cloneString(row[0]); source->chromColIx = sqlUnsigned(row[1]); source->startColIx = sqlUnsigned(row[2]); source->endColIx = sqlUnsigned(row[3]); source->scoreColIx = sqlUnsigned(row[4]); source->normFactor = sqlDouble(row[5]); /* Read in dimension labels. */ AllocArray(source->labels, dimCount); int i; for (i=0; i<dimCount; ++i) source->labels[i] = cloneString(row[i+6]); /* Calculate required columns. */ int minColCount = max(source->chromColIx, source->startColIx); minColCount = max(minColCount, source->endColIx); minColCount = max(minColCount, source->scoreColIx); source->minColCount = minColCount + 1; slAddHead(&sourceList, source); } lineFileClose(&lf); slReverse(&sourceList); return sourceList; }
struct sangRead *readReads(char *fileName, struct hash *pairHash) /* Read in read database file and hook it up to pairs in pairHash. */ { struct sangRead *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[4]; int wordCount; struct sangPair *pair; printf("Reading %s\n", fileName); while (lineFileNextRow(lf, words, 4)) { el = sangReadLoad(words); slAddHead(&list, el); pair = hashMustFindVal(pairHash, el->id); if (el->pq[0] == 'p') { if (pair->fRead) warn("%s - duplicate p read line %d of %s\n", el->id, lf->lineIx, lf->fileName); pair->fRead = el; } else { if (pair->rRead) warn("%s - duplicate q read line %d of %s\n", el->id, lf->lineIx, lf->fileName); pair->rRead = el; } } lineFileClose(&lf); slReverse(&list); return list; }
void readGbAcc(struct lineFile *gaf) /* Read in and record all genbank accessions that have sequences */ { struct gb *gb; char *acc[1]; struct sts *s; while (lineFileNextRow(gaf, acc, 1)) { if (!hashLookup(gbAccHash, acc[0])) { AllocVar(gb); gb->next = NULL; gb->acc = cloneString(acc[0]); gb->s = NULL; gb->gbSeq = TRUE; hashAdd(gbAccHash, acc[0], gb); if (hashLookup(nameHash, acc[0])) { s = hashMustFindVal(nameHash, acc[0]); addElement(acc[0], &s->si->genbank, &s->si->gbCount); removeElement(acc[0], &s->si->otherNames, &s->si->nameCount); } } else { gb = hashMustFindVal(gbAccHash, acc[0]); gb->gbSeq = TRUE; } } }
static void processPslFile(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, char* pslPath) /* Parse a psl file looking for accessions to add to the database. */ { char* row[PSL_NUM_COLS]; struct lineFile *pslLf = gzLineFileOpen(pslPath); while (lineFileNextRow(pslLf, row, PSL_NUM_COLS)) { struct psl* psl = pslLoad(row); processPsl(conn, select, statusTbl, psl, pslLf); pslFree(&psl); } gzLineFileClose(&pslLf); }
static void processOIFile(struct sqlConnection *conn, struct gbSelect* select, struct gbStatusTbl* statusTbl, char* oiPath) /* Parse a psl file looking for accessions to add to the database. */ { char *row[EST_ORIENT_INFO_NUM_COLS]; struct lineFile *oiLf = gzLineFileOpen(oiPath); while (lineFileNextRow(oiLf, row, EST_ORIENT_INFO_NUM_COLS)) { struct estOrientInfo* oi = estOrientInfoLoad(row); processOI(conn, select, statusTbl, oi, oiLf); estOrientInfoFree(&oi); } gzLineFileClose(&oiLf); }
void initKillList() /* Load up a hash of the accessions to avoid. */ { struct lineFile *lf = NULL; char *killFile = optionVal("killList", NULL); char *words[1]; assert(killFile); killHash = newHash(10); lf = lineFileOpen(killFile, TRUE); while(lineFileNextRow(lf, words, ArraySize(words))) { hashAddInt(killHash, words[0], 1); } lineFileClose(&lf); }
void readTissueLibraryIntoCache(char *fileName) /* Read in the tissue and library information from fileName. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[3]; struct slInt *tissue = NULL, *library = NULL; tissLibHash = newHash(12); while(lineFileNextRow(lf, words, ArraySize(words))) { library = newSlInt(sqlSigned(words[1])); tissue = newSlInt(sqlSigned(words[2])); slAddTail(&library, tissue); hashAdd(tissLibHash, words[0], library); } lineFileClose(&lf); }
void protDat(char *protName, char *blatName, char *aliasFile, char *outName) { FILE *outFile = mustOpen(outName, "w"); struct hash *protHash = newHash(10); struct hash *blatHash = newHash(10); struct hash *aliasHash = newHash(10); struct psl *psls, *pslPtr, *protPsls, *blatPsl; struct lineFile *lf = lineFileOpen(aliasFile, TRUE); struct alias *alPtr; char buffer[1024]; char *words[3]; int numWords = optionExists("fb") ? 2 : 3; while (lineFileNextRow(lf, words, numWords)) { AllocVar(alPtr); alPtr->kgName = cloneString(words[1]); if (numWords == 3) alPtr->spName = cloneString(words[2]); hashAdd(aliasHash, cloneString(words[0]), alPtr); } protPsls = pslLoadAll(protName); pslPtr = psls = pslLoadAll(blatName); for(; pslPtr; pslPtr = pslPtr->next) hashAdd(blatHash, pslPtr->qName, pslPtr); for(pslPtr = protPsls; pslPtr; pslPtr = pslPtr->next) { if ((blatPsl = hashFindVal(blatHash, pslPtr->qName)) != NULL) { if ((alPtr = hashFindVal(aliasHash, pslPtr->qName)) != NULL) { if (numWords == 3) sprintf(buffer,"%s.%s:%d-%d.%s.%s",pslPtr->qName,blatPsl->tName, blatPsl->tStart, blatPsl->tEnd,alPtr->kgName, alPtr->spName); else sprintf(buffer,"%s.%s:%d-%d.%s",pslPtr->qName,blatPsl->tName, blatPsl->tStart, blatPsl->tEnd,alPtr->kgName); pslPtr->qName = buffer; pslTabOut(pslPtr, outFile); } } } }
static struct hash *readLift(char *liftAcross) /* read in liftAcross file, create hash of srcName as hash key, * hash elements are simple lists of coordinate relationships * return them all sorted by start position */ { char *row[6]; struct hash *result = newHash(8); struct hashEl *hel = NULL; struct lineFile *lf = lineFileOpen(liftAcross, TRUE); while (lineFileNextRow(lf, row, ArraySize(row))) { struct liftSpec *liftSpec; hel = hashStore(result, row[0]); /* srcName hash */ AllocVar(liftSpec); liftSpec->start = sqlUnsigned(row[1]); /* src start */ liftSpec->end = sqlUnsigned(row[2]); /* src end */ liftSpec->dstName = cloneString(row[3]); /* dstName */ liftSpec->dstStart = sqlUnsigned(row[4]); /* dst start */ liftSpec->strand = '+'; /* dst strand */ if ('-' == *row[5]) liftSpec->strand = '-'; /* accumulate list of lift specs under the srcName hash */ slAddHead(&(hel->val), liftSpec); } /* Go through each srcName in the hash, and sort the list there by * the start coordinate of each item. The searching will expect * them to be in order. */ struct hashCookie cookie = hashFirst(result); while ((hel = hashNext(&cookie)) != NULL) { slSort(&(hel->val), lsStartCmp); if (verboseLevel() > 2) { struct liftSpec *ls; for (ls = hel->val; ls != NULL; ls = ls->next) verbose(3, "# %s\t%d\t%d\t%s\t%d\t%c\n", hel->name, ls->start, ls->end, ls->dstName, ls->dstStart, ls->strand); } } return result; }
struct sangRange *readRanges(char *fileName, struct hash *hash) /* Read range file into list/hash. */ { struct sangRange *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[3]; int wordCount; printf("Reading %s\n", fileName); while (lineFileNextRow(lf, words, 3)) { el = sangRangeLoad(words); slAddHead(&list, el); hashAddUnique(hash, el->name, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct hash *readBed(char *fileName) /* Read bed and return it as a hash keyed by chromName * with binKeeper values. */ { char *row[5]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); int expectedCols = bScore ? 5 : 3; while (lineFileNextRow(lf, row, expectedCols)) { struct binKeeper *bk; struct bed5 *bed; struct hashEl *hel = hashLookup(hash, row[0]); if (hel == NULL) { bk = binKeeperNew(0, 1024*1024*1024); hel = hashAdd(hash, row[0], bk); } bk = hel->val; AllocVar(bed); bed->chrom = hel->name; bed->start = lineFileNeedNum(lf, row, 1); bed->end = lineFileNeedNum(lf, row, 2); if (bScore) bed->score = lineFileNeedNum(lf, row, 4); if (bed->start > bed->end) errAbort("start after end line %d of %s", lf->lineIx, lf->fileName); if (bed->start == bed->end) { if (allowStartEqualEnd) // Note we are tweaking binKeeper coords here, so use bed->start and bed->end. binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed); else lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)"); } else binKeeperAdd(bk, bed->start, bed->end, bed); } lineFileClose(&lf); return hash; }
void bedFileStats(char *bedFile, int colCount, FILE *f) /* Collect stats on sizes of things in a bed file, and scores too. */ { struct lineFile *lf = lineFileOpen(bedFile, TRUE); struct slDouble *sizeList=NULL, *scoreList=NULL, *el; char *row[colCount]; while (lineFileNextRow(lf, row, colCount)) { int size = sqlUnsigned(row[endColIx]) - sqlUnsigned(row[startColIx]); el = slDoubleNew(size); slAddHead(&sizeList, el); double score = sqlDouble(row[scoreColIx]); el = slDoubleNew(score); slAddHead(&scoreList, el); } fprintf(f, "%s\t%d\tsize:", bedFile, slCount(scoreList)); printStats(f, sizeList); fprintf(f, "\tscore:"); printStats(f, scoreList); fprintf(f, "\n"); lineFileClose(&lf); }
static double minOfCol(char *fileName, int colIx) /* Return minimum value seen in given column of file. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); int minColCount = colIx+1; char *row[minColCount]; boolean gotAny = FALSE; double minVal = 0; while (lineFileNextRow(lf, row, minColCount)) { double val = lineFileNeedDouble(lf, row, colIx); if (!gotAny || val < minVal) { gotAny = TRUE; minVal = val; } } lineFileClose(&lf); if (!gotAny) errAbort("No data in %s", fileName); return minVal; }
struct sage *loadSageTags(char *fileName, int numExps) { struct sage *sgList=NULL, *sg=NULL; char *words[3]; struct lineFile *lf = lineFileOpen(fileName, TRUE); while(lineFileNextRow(lf, words,3)) { if(sg == NULL || sg->uni != atoi(words[0])) { if(sg != NULL) slSafeAddHead(&sgList,sg); sg = createNewSage(numExps); sg->uni = atoi(words[0]); snprintf(sg->gb, sizeof(sg->gb), "unknown"); snprintf(sg->gi, sizeof(sg->gb), "unknown"); sg->description = cloneString(words[1]); sg->numTags =1; assert(strlen(words[2]) <= 10); sg->tags = needMem(sizeof(char*) * 1); sg->tags[0] = needMem(sizeof(char) * 11); strcpy(sg->tags[0],words[2]); } else { sg->tags = needMoreMem(sg->tags, (sg->numTags*sizeof(char*)), ((sg->numTags+1)*sizeof(char*))); sg->tags[sg->numTags] = needMem(sizeof(char) * 11); strcpy(sg->tags[sg->numTags],words[2]); sg->numTags++; } } return(sgList); /*for(sg=sgList; sg != NULL; sg = sg->next) { sageTabOut(sg,stdout); }*/ }
struct sangPair *readPairs(char *fileName, struct hash *pairHash, struct hash *rangeHash) /* Read in pair file and connect pairs to relevant range. */ { struct sangPair *list = NULL, *el; struct hashEl *hel; struct sangInsert si; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[2]; int wordCount; printf("Reading %s\n", fileName); while (lineFileNextRow(lf, words, 2)) { sangInsertStaticLoad(words, &si); AllocVar(el); hel = hashAddUnique(pairHash, si.id, el); el->name = hel->name; el->range = hashMustFindVal(rangeHash, si.name); slAddHead(&list, el); } slReverse(&list); lineFileClose(&lf); return list; }
int *readInConservationVals(char *fileName) /* Open up the file and read in the conservation scores. return an array indexed by base position with the conservation scores. Free with freez() */ { struct lineFile *lf = NULL; int *scores = NULL; int chromSize = optionInt("chromSize", 0); int i = 0; char *words[2]; if(chromSize == 0) errAbort("Can't have chromSize set to 0."); warn("Reading in conservation"); setMaxAlloc(sizeof(*scores)*chromSize+1); AllocArray(scores, chromSize); /* Make empty data be -1, a not possible score. */ for(i = 0; i < chromSize; i++) scores[i] = -1; /* Open up our conservation file. */ if(sameString(fileName, "stdin")) lf = lineFileStdin(TRUE); else lf = lineFileOpen(fileName, TRUE); dotForUserInit( chromSize/10 > 1 ? chromSize/10 : 1); while(lineFileNextRow(lf, words, ArraySize(words))) { scores[atoi(words[0])] = round(atof(words[1]) * FLOAT_CHEAT); dotForUser(); } lineFileClose(&lf); warn("Done"); return scores; }
void ctgToChromFa(char *chromName, char *insertFile, char *chromDir, char *orderLst, char *outName, struct hash *liftHash) /* ctgToChromFa - convert contig level fa files to chromosome level. */ { struct hash *uniq = newHash(0); struct bigInsert *bi; struct chromInserts *chromInserts; struct hash *insertHash = newHash(9); struct lineFile *lf = lineFileOpen(orderLst, TRUE); FILE *f = mustOpen(outName, "w"); char ctgFaName[512]; char *words[2]; int liftChromSize = 0; int actualChromSize = 0; boolean isFirst = TRUE; chromInsertsRead(insertFile, insertHash); chromInserts = hashFindVal(insertHash, chromName); fprintf(f, ">%s\n", chromName); while (lineFileNextRow(lf, words, 1)) { char *contig = words[0]; int nSize; if (liftHash != NULL) { struct lift *lift = hashMustFindVal(liftHash, contig); nSize = lift->nBefore; liftChromSize = lift->chromSize; } else nSize = chromInsertsGapSize(chromInserts, rmChromPrefix(contig), isFirst); hashAddUnique(uniq, contig, NULL); addN(f, nSize); actualChromSize += nSize; isFirst = FALSE; sprintf(ctgFaName, "%s/%s/%s.fa", chromDir, contig, contig); if (fileExists(ctgFaName)) { actualChromSize += addFa(f, ctgFaName); } else { warn("%s does not exist\n", ctgFaName); if (!cgiVarExists("missOk")) noWarnAbort(); } } lineFileClose(&lf); if (chromInserts != NULL) if ((bi = chromInserts->terminal) != NULL) { addN(f, bi->size); actualChromSize += bi->size; } if (liftHash != NULL) { if (actualChromSize > liftChromSize) errAbort("Error: chromosome size from lift file is %d, but actual fa size is %d. Possible inconsistency between lift and inserts?", liftChromSize, actualChromSize); else if (actualChromSize < liftChromSize) addN(f, (liftChromSize - actualChromSize)); } if (linePos != 0) fputc('\n', f); fclose(f); }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }
void ticksToWig(int startTick, char *inTable, char *outDensity, char *outAverage) /* ticksToWig - Convert tab-delimited file of Unix time ticks, and possibly also * numerical values to wig file(s).. */ { struct lineFile *lf = lineFileOpen(inTable, TRUE); FILE *densityFile = mustOpen(outDensity, "w"); printVarStepHead(densityFile); FILE *averageFile = NULL; if (outAverage != NULL) { averageFile = mustOpen(outAverage, "w"); printVarStepHead(averageFile); } int colsToParse = 1 + max(tickCol, valCol); char *row[colsToParse]; time_t curTick = 0; int sameTickCount = 0; double tickTotal = 0; double val = 0; time_t tick; while (lineFileNextRow(lf, row, colsToParse)) { tick = lineFileNeedNum(lf, row, tickCol); if (averageFile != NULL) val = lineFileNeedDouble(lf, row, valCol); if (curTick != tick) { if (curTick > tick) errAbort("Input isn't sorted - %ld > %ld line %d of %s\n", (long)curTick, (long)tick, lf->lineIx, lf->fileName); if (startTick == 0) startTick = tick; if (sameTickCount > 0) { fprintf(densityFile, "%ld\t%d\n", curTick - startTick + 1, sameTickCount); time_t i; for (i=curTick+1; i<tick; ++i) { fprintf(densityFile, "%ld\t%d\n", i - startTick + 1, 0); } if (averageFile != NULL) { fprintf(averageFile, "%ld\t%f\n", (long)curTick - startTick + 1, tickTotal/sameTickCount); tickTotal = 0; } sameTickCount = 0; } curTick = tick; } tickTotal += val; sameTickCount += 1; } if (sameTickCount > 0) { fprintf(densityFile, "%ld\t%d\n", curTick - startTick + 1, sameTickCount); if (averageFile != NULL) fprintf(averageFile, "%ld\t%f\n", (long)curTick - startTick + 1, tickTotal/sameTickCount); } carefulClose(&densityFile); carefulClose(&averageFile); }
void rcvs(char *codingTable, char *clusterTable) /* rcvs - Compare riken noncoding vs. nonspliced. */ { struct hash *idHash = newHash(16); // Key id1, val id2 struct hash *nonCodingHash = newHash(16); // Key clusterId, value struct hash *splicedHash = newHash(16); // Key id2, present if spliced struct sqlConnection *conn = sqlConnect("mgsc"); struct sqlResult *sr; char **row; char *words[16]; int wordCount; struct lineFile *lf; int codingSpliced = 0; int noncodingSpliced = 0; int codingNonspliced = 0; int noncodingNonspliced = 0; /* Read id's into hash */ sr = sqlGetResult(conn, NOSQLINJ "select id1,id2 from rikenIds"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(idHash, row[0], cloneString(row[1])); sqlFreeResult(&sr); /* Read spliced into hash */ sr = sqlGetResult(conn, NOSQLINJ "select name from rikenOrientInfo where intronOrientation != 0"); while ((row = sqlNextRow(sr)) != NULL) hashAdd(splicedHash, row[0], NULL); sqlFreeResult(&sr); /* Read noncoding clusters into hash */ lf = lineFileOpen(codingTable, TRUE); while (lineFileNextRow(lf, words, 2)) { if (sameString(words[1], "NoPProt")) hashAdd(nonCodingHash, words[0], NULL); } lineFileClose(&lf); /* Stream through cluster table counting and correlating. */ lf = lineFileOpen(clusterTable, TRUE); while (lineFileNextRow(lf, words, 2)) { char *cluster = words[0]; char *id1 = words[1]; char *id2 = hashMustFindVal(idHash, id1); if (hashLookup(nonCodingHash, cluster)) { if (hashLookup(splicedHash, id2)) ++noncodingSpliced; else ++noncodingNonspliced; } else { if (hashLookup(splicedHash, id2)) ++codingSpliced; else ++codingNonspliced; } } printf("noncodingNonspliced %d\n", noncodingNonspliced); printf("noncodingSpliced %d\n", noncodingSpliced); printf("codingNonspliced %d\n", codingNonspliced); printf("codingSpliced %d\n", codingSpliced); printf("total %d\n", noncodingNonspliced + noncodingSpliced + codingNonspliced + codingSpliced); }
void bedItemOverlapCount(struct hash *chromHash, char *infile, char *outfile){ unsigned maxChromSize = 0; unitSize *counts = (unitSize *)NULL; FILE *f = mustOpen(outfile, "w"); struct hashCookie hc = hashFirst(chromHash); struct hashEl *hel; while( (hel = hashNext(&hc)) != NULL) { unsigned num = (unsigned) ptToInt(hel->val); maxChromSize = max(num, maxChromSize); } verbose(2,"#\tmaxChromSize: %u\n", maxChromSize); if (maxChromSize < 1) errAbort("maxChromSize is zero ?"); /* Allocate just once for the largest chrom and reuse this array */ counts = needHugeMem(sizeof(unitSize) * maxChromSize); /* Reset the array to be zero to be reused */ memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); unsigned chromSize = 0; char *prevChrom = (char *)NULL; boolean outputToDo = FALSE; struct hash *seenHash = newHash(5); struct lineFile *bf = lineFileOpen(infile , TRUE); struct bed *bed = (struct bed *)NULL; char *row[12]; int numFields = doBed12 ? 12 : 3; while (lineFileNextRow(bf,row, numFields)) { int i; bed = bedLoadN(row, numFields); verbose(3,"#\t%s\t%d\t%d\n",bed->chrom,bed->chromStart, bed->chromEnd); if (prevChrom && differentWord(bed->chrom,prevChrom)) // End a chr { verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); outputToDo = FALSE; memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); /* zero counts */ freez(&prevChrom); // prevChrom is now NULL so it will be caught by next if! } if ((char *)NULL == prevChrom) // begin a chr { if (hashLookup(seenHash, bed->chrom)) errAbort("ERROR:input file not sorted. %s seen before on line %d\n", bed->chrom, bf->lineIx); hashAdd(seenHash, bed->chrom, NULL); prevChrom = cloneString(bed->chrom); chromSize = hashIntVal(chromHash, prevChrom); verbose(2,"#\tchrom %s starting, size %d\n", prevChrom,chromSize); } if (bed->chromEnd > chromSize) { // check for circular chrM if (doBed12 || bed->chromStart>=chromSize || differentWord(bed->chrom,"chrM")) { warn("ERROR: %s\t%d\t%d", bed->chrom, bed->chromStart, bed->chromEnd); errAbort("chromEnd > chromSize ? %d > %d", bed->chromEnd,chromSize); } for (i = bed->chromStart; i < chromSize; ++i) INCWOVERFLOW(counts,i); for (i = 0; i < (bed->chromEnd - chromSize); ++i) INCWOVERFLOW(counts,i); } else if (doBed12) { int *starts = bed->chromStarts; int *sizes = bed->blockSizes; int *endStarts = &bed->chromStarts[bed->blockCount]; for(; starts < endStarts; starts++, sizes++) { unsigned int end = *starts + *sizes + bed->chromStart; for (i = *starts + bed->chromStart; i < end; ++i) INCWOVERFLOW(counts,i); } } else { for (i = bed->chromStart; i < bed->chromEnd; ++i) INCWOVERFLOW(counts, i); } outputToDo = TRUE; bedFree(&bed); // plug the memory leak } lineFileClose(&bf); // Note, next file could be on same chr! if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); if (doOutBounds) fprintf(stderr, "min %lu max %lu\n", (unsigned long)overMin, (unsigned long)overMax); verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); carefulClose(&f); freeMem(counts); freez(&prevChrom); // hashFreeWithVals(&chromHash, freez); freeHash(&seenHash); }