void makeConfigFromFileList(char *input, char *output) /* makeConfigFromFileList - Create config file for hgBedsToBedExps from list of files.. */ { FILE *f = mustOpen(output, "w"); struct slName *in, *inList = readAllLines(input); int commonPrefix = commonPrefixSize(inList); int commonSuffix = commonSuffixSize(inList); for (in = inList; in != NULL; in = in->next) { char *s = in->name; int len = strlen(s); char *midString = cloneStringZ(s+commonPrefix, len - commonPrefix - commonSuffix); char *factor, *cell; camelParseTwo(midString, &cell, &factor); fprintf(f, "%s\t%s\t", factor, cell); fprintf(f, "%s\t", cellAbbreviation(cell)); fprintf(f, "file\t%d\t", scoreCol-1); fprintf(f, "%g\t", calcNormScoreFactor(in->name, scoreCol-1)); fprintf(f, "%s\n", in->name); } carefulClose(&f); }
static void regionPairsList(char *regionsFile) { FILE *fp; char buf[500], str1[500], str2[500], orient[10]; int i; int od[4]; fp = mustOpen(regionsFile, "r"); while (fgets(buf, 500, fp)) { if (sscanf(buf, "%[^ ] %s %d %d %d %d", str1, str2, &(od[0]), &(od[1]), &(od[2]), &(od[3])) != 6) errAbort("error: %s", buf); for (i = 0; i < 4; i++) { if (od[i] >= MINPAIR) { mapOrient(i, orient); if (sameString(orient, "[- -]")) printf("%s\t%s\t[+ +]\t(%d)\n", str2, str1, od[i]); else printf("%s\t%s\t%s\t(%d)\n", str1, str2, orient, od[i]); } } } fclose(fp); }
void mafSplitPos(char *database, char *size, char *outFile) /* Pick best positions for split close to size. * Use middle of a gap as preferred site. * If not gaps are in range, use recent repeats (0% diverged) */ { int splitSize = 0; int chromSize = 0; struct hash *chromHash; struct hashCookie hc; struct hashEl *hel; struct sqlConnection *conn = sqlConnect(database); FILE *f; db = database; verbose(1, "Finding split positions for %s at ~%s Mbp intervals\n", database, size); splitSize = sqlSigned(size) * 1000000; if (chrom == NULL) { chromHash = hChromSizeHash(database); } else { chromHash = hashNew(6); hashAddInt(chromHash, chrom, hChromSize(database, chrom)); } conn = sqlConnect(database); f = mustOpen(outFile, "w"); hc = hashFirst(chromHash); while ((hel = hashNext(&hc)) != NULL) { chrom = hel->name; chromSize = ptToInt(hel->val); chromSplits(chrom, chromSize, splitSize, conn, f); } sqlDisconnect(&conn); carefulClose(&f); }
void writeRa(char *fileName) /* Write our .ra file with information common to all NIBB images. */ { FILE *f = mustOpen(fileName, "w"); fprintf(f, "submitSet nibbXenopusLaevis3\n"); fprintf(f, "fullDir ../visiGene/full/inSitu/XenopusLaevis/nibb\n"); fprintf(f, "thumbDir ../visiGene/200/inSitu/XenopusLaevis/nibb\n"); fprintf(f, "priority 1200\n"); fprintf(f, "sliceType whole mount\n"); fprintf(f, "submissionSource National Institute of Basic Biology (NIBB) XDB\n"); fprintf(f, "taxon 8355\n"); fprintf(f, "genotype wild type\n"); fprintf(f, "acknowledgement Thanks to Naoto Ueno and colleagues at NIBB for helping make these images available in VisiGene\n"); /* Still need to fill in contributor, publication, journal, journalUrl, itemUrl */ fprintf(f, "contributor Ueno N., Kitayama A., Terasaka C., Nomoto K., Shibamoto K., Nishide H.\n"); fprintf(f, "year 2005\n"); fprintf(f, "setUrl http://xenopus.nibb.ac.jp\n"); fprintf(f, "itemUrl http://xenopus.nibb.ac.jp/cgi-bin/search?query=%%s&name=clone\n"); fprintf(f, "probeColor purple\n"); carefulClose(&f); }
int main(int argc, char *argv[]) /* hash snpFasta, read through chrN_snpTmp, rewrite with extensions to individual chrom tables */ { struct slName *chromList, *chromPtr; char tableName[64]; if (argc != 2) usage(); snpDb = argv[1]; hSetDb(snpDb); chromList = hAllChromNamesDb(snpDb); errorFileHandle = mustOpen("snpMoltype.errors", "w"); multiFastaHash = readFasta("chrMulti"); for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name); if (!hTableExists(tableName)) continue; verbose(1, "chrom = %s\n", chromPtr->name); chromFastaHash = readFasta(chromPtr->name); processSnps(chromPtr->name); } carefulClose(&errorFileHandle); for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next) { safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name); if (!hTableExists(tableName)) continue; recreateDatabaseTable(chromPtr->name); verbose(1, "loading chrom = %s\n", chromPtr->name); loadDatabase(chromPtr->name); } return 0; }
void sangPairs(char *sangDir, char *outFile) /* sangPairs - Process Sanger Paired reads to remove low quality bases and put in one big file. */ { struct hash *hash = newHash(20); struct fileInfo *dirList, *dirEl; struct fileInfo *subList, *subEl; struct fileInfo *faList, *faEl; FILE *f = mustOpen(outFile, "w"); unsigned long totalSize = 0; dirList = listDirX(sangDir, "*", TRUE); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { if (dirEl->isDir) { printf("%s", dirEl->name); fflush(stdout); subList = listDirX(dirEl->name, "*", TRUE); for (subEl = subList; subEl != NULL; subEl = subEl->next) { if (subEl->isDir) { printf("."); fflush(stdout); faList = listDirX(subEl->name, "*.fasta", TRUE); for (faEl = faList; faEl != NULL; faEl = faEl->next) { totalSize += filterByQual(faEl->name, f, 19, 15, hash); } slFreeList(&faList); } } printf("\n"); slFreeList(&subList); } } printf("Total size %lu bytes\n", totalSize); fclose(f); }
void encode2Md5UpdateManifest(char *md5File, char *rootDir, char *oldManifest, char *newManifest) /* encode2Md5UpdateManifest - Update md5sum, size, validation key in an encode2 * manifest.tab file. */ { struct encode2Manifest *mi, *miList = encode2ManifestLoadAll(oldManifest); struct hash *md5Hash = md5FileHash(md5File); verbose(2, "Got %d items in miList, %d in md5Hash\n", slCount(miList), md5Hash->elCount); FILE *f = mustOpen(newManifest, "w"); int updateCount = 0; for (mi = miList; mi != NULL; mi = mi->next) { char *newMd5 = hashFindVal(md5Hash, mi->fileName); if (newMd5 != NULL) { ++updateCount; updateSumAndAll(mi, newMd5, rootDir); } encode2ManifestTabOut(mi, f); } verbose(1, "Found %d of %d in patch.\n", updateCount, md5Hash->elCount); carefulClose(&f); }
void outputGcStatsWiggle(struct dnaSeq *seqList, struct hash *noGapHash, unsigned int windowLength, char *outFilename) { struct dnaSeq *currSeq = NULL; struct bed *currRegion = NULL; unsigned int i = 0; char *currWindow = NULL; FILE *fout = mustOpen(outFilename, "w"); for(currSeq = seqList; currSeq != NULL; currSeq = currSeq->next) { for(currRegion = hashFindVal(noGapHash, currSeq->name); currRegion != NULL; currRegion = currRegion->next) { fprintf(fout, "fixedStep chrom=%s start=%u step=1\n", currRegion->chrom, currRegion->chromStart+1); for(i = currRegion->chromStart; i <= currRegion->chromEnd - windowLength; i++) { currWindow = &(currSeq->dna[i]); fprintf(fout, "%u\n", reportGcCount(currWindow, windowLength)); } } } carefulClose(&fout); }
boolean flyCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo) /* Get a single fly cDNA sequence. Optionally (if retInfo is non-null) get additional * info about the sequence. */ { long offset; char *faComment; char **pFaComment = (retInfo == NULL ? NULL : &faComment); static struct snof *cdnaSnof = NULL; static FILE *cdnaFa; if (cdnaSnof == NULL) cdnaSnof = snofMustOpen("c:/biodata/fly/cDna/allcdna"); if (cdnaFa == NULL) cdnaFa = mustOpen("c:/biodata/fly/cDna/allcdna.fa", "rb"); if (!snofFindOffset(cdnaSnof, name, &offset)) return FALSE; fseek(cdnaFa, offset, SEEK_SET); if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna)) return FALSE; flyFaCommentIntoInfo(faComment, retInfo); return TRUE; }
void hgNearTest(char *url, char *log) /* hgNearTest - Test hgNear web page. */ { struct htmlPage *rootPage = htmlPageGet(url); struct htmlForm *mainForm; struct htmlFormVar *orgVar; FILE *f = mustOpen(log, "w"); htmlPageValidateOrAbort(rootPage); htmlPageSetVar(rootPage, NULL, orderVarName, "geneDistance"); htmlPageSetVar(rootPage, NULL, countVarName, "25"); if ((mainForm = htmlFormGet(rootPage, "mainForm")) == NULL) errAbort("Couldn't get main form"); if ((orgVar = htmlFormVarGet(mainForm, "org")) == NULL) errAbort("Couldn't get org var"); if (clOrg != NULL) testOrg(rootPage, mainForm, clOrg, clDb); else { struct slName *org; for (org = orgVar->values; org != NULL; org = org->next) { testOrg(rootPage, mainForm, org->name, clDb); } } htmlPageFree(&rootPage); slReverse(&nearTestList); reportSummary(nearTestList, stdout); fprintf(f,"seed=%d\n",seed); reportAll(nearTestList, f); fprintf(f, "---------------------------------------------\n"); reportSummary(nearTestList, f); slFreeList(&nearTestList); carefulClose(&f); }
void doTrimHeader(char *inputFileName) { FILE *outputFileHandle = mustOpen("trimHeader.out", "w"); struct lineFile *lf = lineFileOpen(inputFileName, TRUE); char *line; int lineSize; char *row[5], *contigId[2]; while (lineFileNext(lf, &line, &lineSize)) { if (line[0] != '>') { fprintf(outputFileHandle, "%s\n", line); continue; } chopString(line, ".", contigId, ArraySize(row)); fprintf(outputFileHandle, "%s\n", contigId[0]); } carefulClose(&outputFileHandle); lineFileClose(&lf); }
static void mkH1n1StructData(char *gene, char *idPairFile, char *highlightId, struct tempName *imageFile, struct tempName *chimeraScript) /* generate 3D structure files; difference highlighting is generate only idPairFile or * idPairFile, if specified, but not both. */ { struct tempName prefix; trashDirFile(&prefix, "hgct", gene, "tmp"); char idFile[PATH_LEN], idArg[PATH_LEN], logFile[PATH_LEN], cmd[2*PATH_LEN]; idArg[0] = '\0'; if ((idPairFile != NULL) || (highlightId != NULL)) { safef(idFile, sizeof(idFile), "%s.ids", prefix.forCgi); if (idPairFile != NULL) { // extract first column safef(cmd, sizeof(cmd), "cut -f 1 %s >%s", idPairFile, idFile); if (system(cmd) != 0) errAbort("extracting protein ids failed: %s", cmd); } else { FILE *fh = mustOpen(idFile, "w"); fprintf(fh, "%s\n", highlightId); carefulClose(&fh); } safef(idArg, sizeof(idArg), "--ids %s", idFile); } // dynamic_highlight.pl knows locations of model files safef(logFile, sizeof(logFile), "%s.log", prefix.forCgi); safef(cmd, sizeof(cmd), "%s/dynamic_highlight.pl --rasmol --chimera --protein %s --consensus 0602 %s --base %s >%s 2>&1", getH1n1StructDir(), gene, idArg, prefix.forCgi, logFile); if (system(cmd) != 0) errAbort("creation of 3D structure highlight files failed: %s", cmd); // output names are all predefined by script relative to prefix tempNameFromPrefix(imageFile, &prefix, "_highlight.jpg"); tempNameFromPrefix(chimeraScript, &prefix, "_highlight.cmd"); }
static void checkExtRecord(struct seqFields *seq, char *extPath) /* Check the external file record for a sequence (slow). Assumes * that bounds have been sanity check for a file. */ { /* read range into buffer */ FILE *fh = mustOpen(extPath, "r"); char *faBuf; char accVer[GB_ACC_BUFSZ]; struct dnaSeq *dnaSeq; if (fseeko(fh, seq->file_offset, SEEK_SET) < 0) { gbError("%s: can't seek %s", seq->acc, extPath); carefulClose(&fh); } faBuf = needMem(seq->file_size+1); mustRead(fh, faBuf, seq->file_size); faBuf[seq->file_size] = '\0'; carefulClose(&fh); /* verify contents */ if (faBuf[0] != '>') { gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s", seq->acc, (long long)seq->file_offset, extPath); free(faBuf); return; } dnaSeq = faFromMemText(faBuf); safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version); if (!sameString(dnaSeq->name, accVer)) gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s", seq->acc, dnaSeq->name, accVer, extPath); if (dnaSeq->size != seq->size) gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s", seq->acc, dnaSeq->size, seq->size, extPath); freeDnaSeq(&dnaSeq); }
void makeSineSineFixed(char *fileName, int innerRes, int outerRes, int outerCount, int chromCount) /* Make a test set involving sine modulated sine waves in fixedStep format. */ { FILE *f = mustOpen(fileName, "w"); int totalSteps = innerRes * outerRes * outerCount; double innerStep = TWOPI/innerRes; double outerStep = TWOPI/(innerRes*outerRes); int chromIx; for (chromIx=1; chromIx<=chromCount; ++chromIx) { fprintf(f, "fixedStep chrom=chr%d start=1 step=1 span=1\n", chromIx); double outerAngle = 0, innerAngle = 0; int i; for (i=0; i<totalSteps; ++i) { fprintf(f, "%f\n", 100.0*sin(innerAngle)*sin(outerAngle)); innerAngle += innerStep; outerAngle += outerStep; } } carefulClose(&f); }
void affyTransLiftedToSample(int grouping, char *affyTransIn) /* Top level function to run combine pairs and offset files to give sample. */ { struct affyTransLifted *atl = NULL, *atlList = NULL; struct sample *sampList = NULL, *samp = NULL; struct sample *groupedList = NULL; char *fileRoot = NULL; char buff[10+strlen(affyTransIn)]; FILE *out = NULL; char *fileNameCopy = cloneString(affyTransIn); chopSuffix(fileNameCopy); fprintf(stderr, "."); fflush(stderr); atlList = affyTransLiftedLoadAll(affyTransIn); //warn("Creating samples."); for(atl = atlList; atl != NULL; atl = atl->next) { samp = sampleFromAffyTransLifted(atl, fileNameCopy); if(samp != NULL) slAddHead(&sampList, samp); } //warn("Sorting Samples"); slSort(&sampList, sampleCoordCmp); groupedList = groupByPosition(grouping, sampList); //warn("Saving Samples."); snprintf(buff, sizeof(buff), "%s.sample", affyTransIn); out = mustOpen(buff, "w"); for(samp = groupedList; samp != NULL; samp = samp->next) { sampleTabOut(samp, out); } //warn("Cleaning up."); freez(&fileNameCopy); carefulClose(&out); sampleFreeList(&sampList); sampleFreeList(&groupedList); affyTransLiftedFreeList(&atlList); }
static void createReadsHash(char **argv) { int i; FILE *fp; char buf[500], fub[500], dna[500], qua[500], id[50]; char *str; readsHash = newHash(16); for (i = 2; i <= 3; i++) { fp = mustOpen(argv[i], "r"); for (;;) { if (fgets(buf, 500, fp)) { if(strlen(buf) == 0){break;} if (ncbi) { sscanf(buf, "@%s %*s", id); sprintf(fub, "%s/%d", id, i - 1); } else { sscanf(buf, "@%s %*s", id); strcpy(fub, id); } if (!fgets(buf, 500, fp)){ break; errAbort("error: %s", argv[i]); } sscanf(buf, "%s", dna); if (!fgets(buf, 500, fp)) errAbort("error: %s", argv[i]); if (!fgets(buf, 500, fp)) errAbort("error: %s", argv[i]); sscanf(buf, "%s", qua); sprintf(buf, "%s %s", dna, qua); str = cloneString(buf); hashAdd(readsHash, fub, str); } else break; } fclose(fp); } }
void eisenInput(char *database, char *outFile) /* eisenInput - Create input for Eisen-style cluster program. */ { struct slName *chromList = NULL, *chromEl; FILE *f = mustOpen(outFile, "w"); char *chrom; struct hash *refLinkHash = hashNew(0); struct refLink *refLinkList; struct hash *erHash = hashNew(0); struct expRecord *erList = NULL, *er; /* Load info good for all chromosomes. */ refLinkList = loadRefLink(database, refLinkHash); erList = loadExpRecord(expRecordTable, "hgFixed"); for (er = erList; er != NULL; er = er->next) { char sid[16]; snprintf(sid, sizeof(sid), "%u", er->id); hashAdd(erHash, sid, er); } /* Do it chromosome by chromosome. */ chromList = hAllChromNames(database); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { chrom = chromEl->name; uglyf("%s\n", chrom); oneChromInput(database, chrom, hChromSize(database, chrom), "rnaCluster", expTrack, refLinkHash, erHash, f); } /* Cleanup time! */ expRecordFreeList(&erList); freeHash(&erHash); refLinkFreeList(&refLinkList); freeHash(&refLinkHash); }
int main(int argc, char *argv[]) { char *sangerName, *jimName, *updateName, *errName; struct g2cFile *sangerGenes, *jimGenes; if (argc != 5) { errAbort("c2gcheck - compares two gene-to-cdna files, notes differences\n" "and writes out a third merged file.\n" "Usage:\n" " c2gcheck Sanger Jim Update errs\n"); } memPool = lmInit(1<<16); pushWarnHandler(reportWarning); sangerName = argv[1]; jimName = argv[2]; updateName = argv[3]; errName = argv[4]; errFile = mustOpen(errName, "w"); sangerGenes = loadG2cFile(sangerName); jimGenes = loadG2cFile(jimName); checkOneFile(sangerGenes, sangerName); checkOneFile(jimGenes, jimName); checkTwoFiles(sangerGenes, jimGenes, "Jim unique"); checkTwoFiles(jimGenes, sangerGenes, "Sanger unique"); update(sangerGenes, jimGenes); saveG2cFile(sangerGenes, updateName); lmCleanup(&memPool); return 0; }
void mafToProtein(char *dbName, char *mafTable, char *frameTable, char *org, char *speciesList, char *outName) /* mafToProtein - output protein alignments using maf and frames. */ { struct slName *geneNames = NULL; struct slName *speciesNames = readList(speciesList); FILE *f = mustOpen(outName, "w"); hSetDb(dbName); newTableType = hHasField(frameTable, "isExonStart"); if (inExons && !newTableType) errAbort("must have new mafFrames type to output in exons"); if (geneList != NULL) geneNames = readList(geneList); else if (geneName != NULL) { int len = strlen(geneName); geneNames = needMem(sizeof(*geneNames)+len); strcpy(geneNames->name, geneName); } else geneNames = queryNames(dbName, frameTable, org); for(; geneNames; geneNames = geneNames->next) { verbose(2, "outting gene %s \n",geneNames->name); outGene(f, geneNames->name, dbName, mafTable, frameTable, org, speciesNames); if (delay) { verbose(2, "delaying %d seconds\n",delay); sleep(delay); } } }
void twoBitMask(char *inName, char *maskName, char *outName) /* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */ { struct hash *tbHash = hashNew(20); struct hash *bitmapHash = hashNew(20); struct twoBit *twoBitList = NULL; struct twoBit *twoBit = NULL; FILE *f = NULL; if (! twoBitIsFile(inName)) { if (twoBitIsSpec(inName)) errAbort("Sorry, this works only on whole .2bit files, not specs."); else errAbort("Input %s does not look like a proper .2bit file.", inName); } twoBitList = slurpInput(inName, tbHash, bitmapHash); /* Read mask data into bitmapHash, store it in twoBits: */ if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed")) maskWithBed(maskName, tbHash, bitmapHash); else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out")) maskWithOut(maskName, tbHash, bitmapHash); else errAbort("Sorry, maskFile must end in \".bed\" or \".out\"."); /* Create a new .2bit file, write it out from twoBits. */ f = mustOpen(outName, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); /* Don't bother freeing twoBitList and hashes here -- just exit. */ }
void countCosmids(char *listFileName, FILE *out) /* Read each cosmid in list file and find out how big it is. */ { FILE *listFile = mustOpen(listFileName, "r"); char line[512], *s; int lineCount; struct dnaSeq *seq; char path[512]; while (fgets(line, sizeof(line), listFile)) { ++lineCount; s = trimSpaces(line); sprintf(path, "%s/%s", "C:/biodata/cbriggsae/finish", s); seq = faReadDna(path); ++cosmidCount; cosmidTotalSize += seq->size; freeDnaSeq(&seq); } fclose(listFile); cosmidAverageSize = round((double)cosmidTotalSize/cosmidCount); fprintf(out, "%d cosmids, average length %d\n", cosmidCount, cosmidAverageSize); }
void readAllWordsOrFa(char *fileName, char ***retFiles, int *retFileCount, char **retBuf) /* Open a file and check if it is .fa. If so return just that * file in a list of one. Otherwise read all file and treat file * as a list of filenames. */ { FILE *f = mustOpen(fileName, "r"); char c = fgetc(f); fclose(f); if (c == '>') { char **files; *retFiles = AllocArray(files, 1); *retBuf = files[0] = cloneString(fileName); *retFileCount = 1; return; } else { readAllWords(fileName, retFiles, retFileCount, retBuf); } }
void correctEst(char *oldFa, char *pslFile, char *nibDir, char *outFa) /* correctEst - Correct ESTs by passing them through genome. */ { struct hash *pslHash = hashPsls(pslFile); struct lineFile *lf = lineFileOpen(oldFa, FALSE); FILE *f = mustOpen(outFa, "w"); static struct dnaSeq est; struct hashEl *hel; struct psl *psl; struct hash *nibHash = newHash(8); while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name)) { if ((psl = hashFindVal(pslHash, est.name)) != NULL) { correctOne(&est, psl, nibDir, nibHash, f); } else { faWriteNext(f, est.name, est.dna, est.size); } } }
void hgKgGetText(char *database, char *outFile) /* hgKgGetText - Get text from known genes into a file. */ { FILE *f = mustOpen(outFile, "w"); struct sqlConnection *conn = sqlConnect(database); struct sqlConnection *spConn = sqlConnect("uniProt"); struct sqlConnection *goConn = sqlConnect("go"); struct kgXref *kgList = NULL, *kg; struct hash *refSeqHash = NULL; /* Return hash keyed by refSeq NM_ id, with description values. */ gotRefSeqSummary = sqlTableExists(conn, summaryTable); if (gotRefSeqSummary) refSeqHash = getRefSeqSummary(conn); else warn("No %s table in %s, proceeding without...", summaryTable, database); kgList = getKgList(conn); verbose(1, "Read in %d known genes from %s\n", slCount(kgList), database); for (kg = kgList; kg != NULL; kg = kg->next) getText(kg, refSeqHash, conn, spConn, goConn, f); carefulClose(&f); }
boolean findLineInFile(char *fileName, char *start, char *lineBuf, int lineBufSize) /* Loop through each line in named file until come to one whose * first word (deliminated by a space) is start. Put the resulting * line in lineBuf. */ { FILE *f; int startLen = strlen(start); boolean foundIt = FALSE; f = mustOpen(fileName, "r"); for (;;) { if ((fgets(lineBuf, lineBufSize, f)) == NULL) break; if (strncmp(start, lineBuf, startLen) == 0 && lineBuf[startLen] == ' ') { foundIt = TRUE; break; } } fclose(f); return foundIt; }
void seqFromPsl(char *inPsl, char *inTwoBit, char *outFa) /* seqFromPsl - Extract masked sequence from database corresponding to psl file. */ { struct twoBitFile *tbf = twoBitOpen(inTwoBit); struct lineFile *lf = pslFileOpen(inPsl); FILE *f = mustOpen(outFa, "w"); struct psl *psl; while ((psl = pslNext(lf)) != NULL) { char faHead[512]; struct dnaSeq *seq = twoBitReadSeqFrag(tbf, psl->tName, psl->tStart, psl->tEnd); if (psl->strand[0] == '-') reverseComplement(seq->dna, seq->size); safef(faHead, sizeof(faHead), "%s (%s:%d-%d)", psl->qName, psl->tName, psl->tStart+1, psl->tEnd); if (hardMask) lowerToN(seq->dna, seq->size); faWriteNext(f, faHead, seq->dna, seq->size); } carefulClose(&f); }
void blatFlekFilter(char *outName, int inCount, char *inNames[]) /* blatFilter - filter blat alignments somewhat. */ { int i; FILE *f = mustOpen(outName, "w"); for (i=0; i<inCount; ++i) { char *inName = inNames[i]; struct lineFile *lf = pslFileOpen(inName); struct psl *psl; while ((psl = pslNext(lf)) != NULL) { dotOut(); if (psl->tEnd - psl->tStart < (psl->qEnd + psl->qStart) * 3) pslTabOut(psl, f); else writePslFrags(psl, f); pslFree(&psl); } } printf("\n"); }
void readFile(char *pslFile) /* Implements the readFile task */ { FILE *outFh = NULL; struct pslReader* pr = pslReaderFile(pslFile, gChrom); struct psl* psl; int numRows = 0; if (gOutput != NULL) outFh = mustOpen(gOutput, "w"); while ((numRows < gMaxRows) && ((psl = pslReaderNext(pr)) != NULL)) { if (outFh != NULL) pslTabOut(psl, outFh); pslFree(&psl); numRows++; } carefulClose(&outFh); pslReaderFree(&pr); checkNumRows(pslFile, numRows); }
void loadChroms() /* hash chromNames, create file handles */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; FILE *f; char fileName[64]; chromHash = newHash(0); sqlSafef(query, sizeof(query), "select chrom from chromInfo"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { safef(fileName, sizeof(fileName), "%s_snp125hg17ortho.tab", row[0]); f = mustOpen(fileName, "w"); verbose(1, "chrom = %s\n", row[0]); hashAdd(chromHash, cloneString(row[0]), f); } sqlFreeResult(&sr); hFreeConn(&conn); }
void makeMixem(char *fileName) /* Make a file that mixes up various types */ { FILE *f = mustOpen(fileName, "w"); fprintf(f, "variableStep chrom=chr1\n"); fprintf(f, "100\t1.0\n"); fprintf(f, "200\t2.0\n"); fprintf(f, "fixedStep chrom=chr1 start=1000 step=2\n"); fprintf(f, "1.0\n"); fprintf(f, "2.0\n"); fprintf(f, "3.0\n"); fprintf(f, "4.0\n"); fprintf(f, "chr1\t10000\t10100\t100\n"); fprintf(f, "chr1\t20000\t20100\t200\n"); fprintf(f, "chr2\t10000\t10100\t100\n"); fprintf(f, "chr3\t10000\t10100\t100\n"); fprintf(f, "fixedStep chrom=chr11 start=1000 step=2\n"); fprintf(f, "11.0\n"); fprintf(f, "12.0\n"); fprintf(f, "13.0\n"); fprintf(f, "14.0\n"); carefulClose(&f); }