int main(int argc, char *argv[]) /* Fix carraige returns. */ { char dir[256], name[128], extension[64]; char bakName[512]; char *fileName; int i; FILE *out; struct lineFile *lf; char *line; char *end; int lineSize; if (argc<2) errAbort("fixCr - strip <CR>s from ends of lines"); for (i=1; i<argc; ++i) { fileName = argv[i]; printf("%s ", fileName); fflush(stdout); splitPath(fileName, dir, name, extension); sprintf(bakName, "%s%s%s", dir, name, ".bak"); remove(bakName); rename(fileName, bakName); lf = lineFileOpen(bakName, FALSE); out = mustOpen(fileName, "w"); while (lineFileNext(lf, &line, &lineSize)) { if (lineSize > 1) { end = line + lineSize - 2; if (*end == '\r') { *end = '\n'; lineSize -= 1; } } mustWrite(out, line, lineSize); } fclose(out); lineFileClose(&lf); } printf("\n"); return 0; }
struct cartDb *cartDbLoadAll(char *fileName) /* Load all cartDb from a tab-separated file. * Dispose of this with cartDbFreeList(). */ { struct cartDb *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[6]; while (lineFileRow(lf, row)) { el = cartDbLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct rhMapZfishInfo *rhMapZfishInfoLoadAll(char *fileName) /* Load all rhMapZfishInfo from a whitespace-separated file. * Dispose of this with rhMapZfishInfoFreeList(). */ { struct rhMapZfishInfo *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[10]; while (lineFileRow(lf, row)) { el = rhMapZfishInfoLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct rhMapZfishInfo *rhMapZfishInfoLoadAllByChar(char *fileName, char chopper) /* Load all rhMapZfishInfo from a chopper separated file. * Dispose of this with rhMapZfishInfoFreeList(). */ { struct rhMapZfishInfo *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[10]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = rhMapZfishInfoLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct gtexGeneBed *gtexGeneBedLoadAllByChar(char *fileName, char chopper) /* Load all gtexGeneBed from a chopper separated file. * Dispose of this with gtexGeneBedFreeList(). */ { struct gtexGeneBed *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[11]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = gtexGeneBedLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct gtexGeneBed *gtexGeneBedLoadAll(char *fileName) /* Load all gtexGeneBed from a whitespace-separated file. * Dispose of this with gtexGeneBedFreeList(). */ { struct gtexGeneBed *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[11]; while (lineFileRow(lf, row)) { el = gtexGeneBedLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct tfbsConsFactors *tfbsConsFactorsLoadAllByChar(char *fileName, char chopper) /* Load all tfbsConsFactors from a chopper separated file. * Dispose of this with tfbsConsFactorsFreeList(). */ { struct tfbsConsFactors *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[5]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = tfbsConsFactorsLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct tfbsConsFactors *tfbsConsFactorsLoadAll(char *fileName) /* Load all tfbsConsFactors from a whitespace-separated file. * Dispose of this with tfbsConsFactorsFreeList(). */ { struct tfbsConsFactors *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[5]; while (lineFileRow(lf, row)) { el = tfbsConsFactorsLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct arcogdesc *arcogdescLoadAll(char *fileName) /* Load all arcogdesc from a whitespace-separated file. * Dispose of this with arcogdescFreeList(). */ { struct arcogdesc *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { el = arcogdescLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
static void readTrans(char *transFile, char *faDir, struct hash *cloneHash, struct hash *fragHash) /* Read in transFile into hashes. */ { struct lineFile *lf = lineFileOpen(transFile, TRUE); char *row[3]; char *parts[3], *subParts[2]; int partCount, subCount; char faName[512]; struct clone *clone; struct frag *frag; printf("Reading %s\n", transFile); while (lineFileRow(lf, row)) { char *cloneName = row[1]; char *e = strchr(cloneName, '~'); if (e == NULL) errAbort("Missing ~ line %d of %s", lf->lineIx, lf->fileName); *e++ = 0; if ((clone = hashFindVal(cloneHash, cloneName)) == NULL) { AllocVar(clone); hashAddSaveName(cloneHash, cloneName, clone, &clone->name); chopSuffix(cloneName); sprintf(faName, "%s/%s.fa", faDir, cloneName); cloneName = NULL; clone->faFile = cloneString(faName); } AllocVar(frag); hashAddSaveName(fragHash, row[0], frag, &frag->name); partCount = chopString(row[2], "(:)", parts, ArraySize(parts)); if (partCount != 2) errAbort("Expecting (ACCESSION.VER:START..STOP) line %d of %s", lf->lineIx, lf->fileName); subCount = chopString(parts[1], ".", subParts, ArraySize(subParts)); if (subCount != 2) errAbort("Expecting START..STOP line %d of %s", lf->lineIx, lf->fileName); frag->start = atoi(subParts[0])-1; frag->end = atoi(subParts[1]); frag->clone = clone; if (clone->size < frag->end) clone->size = frag->end; slAddTail(&clone->fragList, frag); } lineFileClose(&lf); }
struct hash *getChromLimits(char *database) /* Get hash full of chromosome limits. */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; struct hash *hash = newHash(8); struct chromLimit *clList = NULL, *cl; double sum = 0; char *limitFile = optionVal("chromLimit", NULL); /* Read in chromosome info from database. */ sr = sqlGetResult(conn, "NOSQLINJ select chrom,size from chromInfo"); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(cl); hashAddSaveName(hash, row[0], cl, &cl->name); cl->size = atoi(row[1]); sum += cl->size; slAddHead(&clList, cl); } sqlFreeResult(&sr); hFreeConn(&conn); /* Calculate max picks. */ for (cl = clList; cl != NULL; cl = cl->next) { cl->maxPicks = round(60.0*cl->size/sum); } /* Override max picks based on chromLimits file if any. */ if (limitFile != NULL) { struct lineFile *lf = lineFileOpen(limitFile, TRUE); char *row[2]; while (lineFileRow(lf, row)) { cl = hashFindVal(hash, row[0]); cl->maxPicks = lineFileNeedNum(lf, row, 1); } lineFileClose(&lf); } return hash; }
void chainMergeSort(int fileCount, char *files[], FILE *out, int level) /* chainMergeSort - Combine sorted files into larger sorted file. */ { int i; struct chainFile *cf; int id = 0; struct quickHeap *h = NULL; h = newQuickHeap(fileCount, &cmpChainScores); /* Open up all input files and read first chain. */ for (i=0; i<fileCount; ++i) { AllocVar(cf); cf->lf = lineFileOpen(files[i], TRUE); lineFileSetMetaDataOutput(cf->lf, out); cf->chain = chainRead(cf->lf); if (cf->chain) addToQuickHeap(h, cf); else cfEof(&cf,level); /* deal with EOF */ } while (!quickHeapEmpty(h)) { cf = peekQuickHeapTop(h); if (!saveId) cf->chain->id = ++id; /* We reset id's here. */ chainWrite(cf->chain, out); chainFree(&cf->chain); if ((cf->chain = chainRead(cf->lf))) { quickHeapTopChanged(h); } else { /* deal with EOF */ if (!removeFromQuickHeapByElem(h, cf)) errAbort("unexpected error: chainFile not found on heap"); cfEof(&cf,level); } } freeQuickHeap(&h); }
static struct hash *readLift(char *liftAcross) /* read in liftAcross file, create hash of srcName as hash key, * hash elements are simple lists of coordinate relationships * return them all sorted by start position */ { char *row[6]; struct hash *result = newHash(8); struct hashEl *hel = NULL; struct lineFile *lf = lineFileOpen(liftAcross, TRUE); while (lineFileNextRow(lf, row, ArraySize(row))) { struct liftSpec *liftSpec; hel = hashStore(result, row[0]); /* srcName hash */ AllocVar(liftSpec); liftSpec->start = sqlUnsigned(row[1]); /* src start */ liftSpec->end = sqlUnsigned(row[2]); /* src end */ liftSpec->dstName = cloneString(row[3]); /* dstName */ liftSpec->dstStart = sqlUnsigned(row[4]); /* dst start */ liftSpec->strand = '+'; /* dst strand */ if ('-' == *row[5]) liftSpec->strand = '-'; /* accumulate list of lift specs under the srcName hash */ slAddHead(&(hel->val), liftSpec); } /* Go through each srcName in the hash, and sort the list there by * the start coordinate of each item. The searching will expect * them to be in order. */ struct hashCookie cookie = hashFirst(result); while ((hel = hashNext(&cookie)) != NULL) { slSort(&(hel->val), lsStartCmp); if (verboseLevel() > 2) { struct liftSpec *ls; for (ls = hel->val; ls != NULL; ls = ls->next) verbose(3, "# %s\t%d\t%d\t%s\t%d\t%c\n", hel->name, ls->start, ls->end, ls->dstName, ls->dstStart, ls->strand); } } return result; }
struct rgi *readRgi(char *inName) { struct rgi *rgiList = NULL, *rgi; struct lineFile *lf = lineFileOpen(inName, TRUE); int wordCount; char *words[8]; while ((wordCount = lineFileChop(lf, words)) != 0) { lineFileExpectWords(lf, 4, wordCount); rgi = rgiLoad(words); slAddHead(&rgiList, rgi); uglyf("%s %s: min %d, max %d\n", rgi->a, rgi->b, rgi->minDistance, rgi->maxDistance); } lineFileClose(&lf); slReverse(&rgiList); return rgiList; }
void doIt(char *inName, char *tNibDirOr2bit, char *qNibDirOr2bit, char *outName) /* chainToAxt - Convert from chain to axt file. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); struct nibTwoCache *tSeqCache = nibTwoCacheNew(tNibDirOr2bit); struct nibTwoCache *qSeqCache = nibTwoCacheNew(qNibDirOr2bit); struct chain *chain = NULL; FILE *f = mustOpen(outName, "w"); while ((chain = chainRead(lf)) != NULL) { if (chain->score >= minScore) doAChain(chain, tSeqCache, qSeqCache, f); chainFree(&chain); } lineFileClose(&lf); carefulClose(&f); }
void makeInfoPai(boolean isEst, char *inName, char *outName) /* Make a pair based on .info file - which has two fields: * accession clone */ { struct lineFile *lf = lineFileOpen(inName, TRUE); char *row[2]; char acc[128]; if (isEst) errAbort("Currently only handle BAC .info files."); while (lineFileRow(lf, row)) { strcpy(acc, row[0]); chopSuffix(acc); addBac(lf, acc, row[1]); } lineFileClose(&lf); }
void getMachines(char *filename) /* Read in list of machines to use. */ { struct lineFile *lf = lineFileOpen(filename, TRUE); char *line; int lineSize; struct machine *machine; while (lineFileNext(lf, &line, &lineSize)) { AllocVar(machine); // is this equivalent to slAddHead? machine->name = line; machine->next = machineList; machineList = machine; } /* could reverse order here */ }
void axtIndex(char *in, char *out) /* axtIndex - Create summary file for axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; for (;;) { off_t pos = lineFileTell(lf); axt = axtRead(lf); if (axt == NULL) break; fprintf(f, "%d %d %lld\n", axt->tStart, axt->tEnd - axt->tStart, (unsigned long long) pos); axtFree(&axt); } carefulClose(&f); }
struct hash *readOrfToGene(char *fileName) /* Read two column orf/gene file and return a hash * keyed by orf with gene values. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[2]; struct hash *hash = newHash(16); while (lineFileRow(lf, row)) { char *orf = row[0]; char *gene = row[1]; if (!strchr(orf, '.') || !strchr(gene, '-')) errAbort("%s doesn't seem to be in ORF<tab>gene<CR> format", fileName); hashAdd(hash, orf, cloneString(gene)); } lineFileClose(&lf); return hash; }
struct hash *readStrand(char *strandFile) /* read the strands from a file */ { struct hash *strandHash = newHash(16); struct lineFile *lf = lineFileOpen(strandFile, TRUE); /* input file */ char *row[2]; /* number of fields in input file */ while (lineFileRow(lf, row)) /* process one snp at a time */ { struct strand *strand; AllocVar(strand); strand->name = cloneString(row[0]); strand->strand = cloneString(row[1]); hashAddSaveName(strandHash, strand->name, strand, &strand->name); } return strandHash; }
struct affyPairs *affyPairsLoadAll(char *fileName) /* Load all affyPairs from a tab-separated file. * Dispose of this with affyPairsFreeList(). */ { struct affyPairs *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[12]; lineFileRow(lf, row); /* get rid of one line header. */ while (lineFileRow(lf, row)) { el = affyPairsLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
void countSeq(char *fileName, int *retSeqCount, int *retBaseCount) /* Count bases and sequences in fa file. */ { int seqCount = 0, baseCount = 0, oneSize; struct lineFile *lf = lineFileOpen(fileName, TRUE); DNA *dna; char *name; while (faSpeedReadNext(lf, &dna, &oneSize, &name)) { seqCount += 1; baseCount += oneSize; } lineFileClose(&lf); *retSeqCount = seqCount; *retBaseCount = baseCount; }
struct hash *readSizes(char *fileName) /* Read tab-separated file into hash with * name key size value. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); char *row[2]; while (lineFileRow(lf, row)) { char *name = row[0]; int size = lineFileNeedNum(lf, row, 1); /* trust the user to not have duplicated names in the lengths file */ hashAdd(hash, name, intToPt(size)); } lineFileClose(&lf); return hash; }
void rmskOut2OpenVerify(char *fileName, struct lineFile **retFile, boolean *retEmpty) /* Open repeat masker .out file and verify that it is good. * Set retEmpty if it has header characteristic of an empty file. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; lineFileNeedNext(lf, &line, &lineSize); if (startsWith("There were no", line)) *retEmpty = TRUE; line = skipLeadingSpaces(line); if (! ( startsWith("SW", line) || startsWith("bit", line) ) ) errAbort("%s doesn't seem to be a RepeatMasker .out file", fileName); lineFileSkip(lf, 2); *retEmpty = FALSE; *retFile = lf; }
void tagToBed12(char *pairTagFile, char *bedFile) /* tagToBed12 - Convert tagAlign format to bed 12 + 2. */ { struct lineFile *lf = lineFileOpen(pairTagFile, TRUE); FILE *f = mustOpen(bedFile, "w"); struct tagAlign *pt; struct bed12wSeq bed; char *row[6]; int blockSizes[1]; int chromStarts[1]; chromStarts[0] = 0; bed.chromStarts = chromStarts; bed.blockSizes = blockSizes; bed.blockCount = 1; bed.strand[1] = 0; while (lineFileRow(lf, row)) { pt = tagAlignLoad(row); bed.chrom = pt->chrom; bed.chromStart = pt->chromStart; bed.thickStart = pt->chromStart; bed.chromEnd = pt->chromEnd; bed.thickEnd = pt->chromEnd; bed.name = pt->sequence; bed.score = pt->score; bed.strand[0] = pt->strand; bed.seq1 = pt->sequence; bed.seq2 = "X"; bed.reserved = 0; blockSizes[0] = strlen(pt->sequence); assert(bed.chromEnd = bed.chromStart + blockSizes[0]); bed12wSeqOutput(&bed, f, '\t', '\n'); tagAlignFree(&pt); } lineFileClose(&lf); }
void convertCelFile(struct lqRecord ***lqMatrix, struct hash *nmerHash, char *celFile, char *outputFormat) { struct lineFile *lf = lineFileOpen(celFile, TRUE); char *line=NULL; int lineSize, x, y, i,j; char *words[5]; struct lqRecord *pm = NULL, *mm=NULL; char *outFile = needMem(sizeof(char)*(strlen(celFile)+5)); FILE *out = NULL; int numCols=0, numRows=0; struct cel ***celMatrix = NULL; safef(outFile, strlen(celFile)+5, "%s.tab", celFile); out = mustOpen(outFile, "w"); parseCelRowsCols(lf, &numRows, &numCols); if(numCols == 0 || numRows == 0) errAbort("Couldn't find a 'Cols=' or a 'Rows=' in %s, is this a cel file?\n", celFile); /* Allocate the matrix. */ celMatrix = needMem(sizeof(struct lqRecord *)*numRows); for(i=0; i<numRows; i++) celMatrix[i] = needMem(sizeof(struct lqRecord *)*numCols); while(lineFileNextRowTab(lf, words, 5)) { struct cel *cel = parseCelRow(words); celMatrix[cel->x][cel->y] = cel; if(cel->x + 1 == numRows && cel->y +1 == numCols) break; } outputPairsFile(out, numRows, numCols, lqMatrix, celMatrix); /* Cleanup. */ for(i=0;i<numRows;i++) for(j=0; j<numCols; j++) if(celMatrix[i][j] != NULL) freez(&celMatrix[i][j]); for(i=0; i<numRows; i++) freez(&celMatrix[i]); freez(&celMatrix); lineFileClose(&lf); freez(&outFile); carefulClose(&out); }
struct hash *loadGenePositions(char *database, struct sqlConnection *conn, char *fileName) /* Read in 7 column file and convert to hash of gene * positions. */ { struct hash *hash = newHash(16); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[7]; int count = 0; struct genomePos *posList = NULL, *pos; while (lineFileRow(lf, row)) { int geneStart,geneEnd,upSize,downSize; AllocVar(pos); hashAddSaveName(hash, row[0], pos, &pos->name); slAddHead(&posList, pos); pos->chrom = hgOfficialChromName(database, row[1]); if (pos->chrom == NULL) errAbort("Unrecognized chromosome %s line %d of %s", row[1], lf->lineIx, lf->fileName); geneStart = lineFileNeedNum(lf, row, 2); geneEnd = lineFileNeedNum(lf, row, 3); pos->strand = row[4][0]; if (pos->strand != '+' && pos->strand != '-') errAbort("Unrecognized strand %s line %d of %s", row[4], lf->lineIx, lf->fileName); upSize = lineFileNeedNum(lf, row, 5); downSize = lineFileNeedNum(lf, row, 6); if (pos->strand == '+') { pos->start = geneStart - upSize; pos->end = geneStart + downSize; } else { pos->start = geneEnd - downSize; pos->end = geneEnd + upSize; } ++count; } verbose(1, "%d genes in %s\n", count, fileName); return hash; }
void aveNoQuartiles(char *fileName) /* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */ { bits64 count = 0; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[128], *word; int wordCount; int wordIx = col-1; double sumData = 0.0, sumSquares = 0.0; double minVal = DBL_MAX, maxVal = -DBL_MAX; while ((wordCount = lineFileChop(lf, words)) > 0) { word = words[wordIx]; if (word[0] == '-' || isdigit(word[0])) { double val = sqlDouble(word); if (minVal > val) minVal = val; if (maxVal < val) maxVal = val; sumData += val; sumSquares += val * val; ++count; } } if (count == 0) errAbort("No numerical data column %d of %s", col, fileName); double average = sumData/count; double stdDev = calcStdFromSums(sumData, sumSquares, count); if (tableOut) { printf("# min max mean N sum stddev\n"); printf("%g %g %g %llu %g %g\n", minVal, maxVal, average, count, sumData, stdDev); } else { printf("average %f\n", average); printf("min %f\n", minVal); printf("max %f\n", maxVal); printf("count %llu\n", count); printf("total %f\n", sumData); printf("standard deviation %f\n", stdDev); } }
void newStitch3(char *axtFile, char *output) /* newStitch3 - Another stitching experiment - with kd-trees.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct dyString *dy = newDyString(512); struct lineFile *lf = lineFileOpen(axtFile, TRUE); struct axt *axt; struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(output, "w"); /* Read input file and divide alignments into various parts. */ while ((axt = axtRead(lf)) != NULL) { struct cBlock *block; if (axt->score < 500) { axtFree(&axt); continue; } dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", axt->qName, axt->qStrand, axt->tName); sp = hashFindVal(pairHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(pairHash, dy->string, sp, &sp->name); } AllocVar(block); block->qStart = axt->qStart; block->qEnd = axt->qEnd; block->tStart = axt->tStart; block->tEnd = axt->tEnd; block->score = axt->score; slAddHead(&sp->blockList, block); axtFree(&axt); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); chainPair(sp, f); } dyStringFree(&dy); }
void addStretchInfo(char *fileName, struct chromInfo *ctg) /* Add info about how much clones are stretched from gl file. */ { struct hash *cloneHash = newHash(12); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; struct gl gl; struct clone *cloneList = NULL, *clone; while (lineFileRow(lf, row)) { glStaticLoad(row, &gl); chopSuffix(gl.frag); if ((clone = hashFindVal(cloneHash, gl.frag)) == NULL) { AllocVar(clone); slAddHead(&cloneList, clone); hashAddSaveName(cloneHash, gl.frag, clone, &clone->name); clone->start = gl.start; clone->end = gl.end; } else { if (gl.start < clone->start) clone->start = gl.start; if (gl.end > clone->end) clone->end = gl.end; } clone->totalSize += gl.end - gl.start; } for (clone = cloneList; clone != NULL; clone = clone->next) { int stretchSize = clone->end - clone->start; double stretchRatio; ctg->cloneCount += 1; ctg->totalStretch += stretchSize - clone->totalSize; stretchRatio = stretchSize / clone->totalSize; if (stretchRatio > 1.3) ctg->stretchedClones += 1; if (stretchRatio > 2.0) ctg->wayStretchedClones += 1; } lineFileClose(&lf); hashFree(&cloneHash); slFreeList(&cloneList); }