void blat(char *dbFile, char *queryFile, char *outName) /* blat - Standalone BLAT fast sequence search command line tool. */ { char **dbFiles, **queryFiles; int dbCount, queryCount; struct dnaSeq *dbSeqList, *seq; struct genoFind *gf; boolean tIsProt = (tType == gftProt); boolean qIsProt = (qType == gftProt); boolean bothSimpleNuc = (tType == gftDna && (qType == gftDna || qType == gftRna)); boolean bothSimpleProt = (tIsProt && qIsProt); FILE *f = mustOpen(outName, "w"); boolean showStatus = (f != stdout); databaseName = dbFile; gfClientFileArray(dbFile, &dbFiles, &dbCount); if (makeOoc != NULL) { gfMakeOoc(makeOoc, dbFiles, dbCount, tileSize, repMatch, tType); if (showStatus) printf("Done making %s\n", makeOoc); exit(0); } gfClientFileArray(queryFile, &queryFiles, &queryCount); dbSeqList = gfClientSeqList(dbCount, dbFiles, tIsProt, tType == gftDnaX, repeats, minRepDivergence, showStatus); databaseSeqCount = slCount(dbSeqList); for (seq = dbSeqList; seq != NULL; seq = seq->next) databaseLetters += seq->size; gvo = gfOutputAny(outputFormat, minIdentity*10, qIsProt, tIsProt, noHead, databaseName, databaseSeqCount, databaseLetters, minIdentity, f); if (bothSimpleNuc || bothSimpleProt) { struct hash *maskHash = NULL; /* Save away masking info for output. */ if (repeats != NULL) { maskHash = newHash(0); for (seq = dbSeqList; seq != NULL; seq = seq->next) { Bits *maskedBits = maskFromUpperCaseSeq(seq); hashAdd(maskHash, seq->name, maskedBits); } } /* Handle masking and indexing. If masking is off, we want the indexer * to see unmasked sequence, otherwise we want it to see masked. However * after indexing we always want it unmasked, because things are always * unmasked for the extension phase. */ if (mask == NULL && !bothSimpleProt) gfClientUnmask(dbSeqList); gf = gfIndexSeq(dbSeqList, minMatch, maxGap, tileSize, repMatch, ooc, tIsProt, oneOff, FALSE, stepSize); if (mask != NULL) gfClientUnmask(dbSeqList); searchOneIndex(queryCount, queryFiles, gf, outName, tIsProt, maskHash, f, showStatus); freeHash(&maskHash); } else if (tType == gftDnaX && qType == gftProt) { bigBlat(dbSeqList, queryCount, queryFiles, outName, FALSE, TRUE, f, showStatus); } else if (tType == gftDnaX && (qType == gftDnaX || qType == gftRnaX)) { bigBlat(dbSeqList, queryCount, queryFiles, outName, TRUE, qType == gftDnaX, f, showStatus); } else { errAbort("Unrecognized combination of target and query types\n"); } if (dotEvery > 0) printf("\n"); freeDnaSeqList(&dbSeqList); }
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, char *outName, char *tTypeName, char *qTypeName) /* gfClient - A client for the genomic finding program that produces a .psl file. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); static bioSeq seq; FILE *out = mustOpen(outName, "w"); enum gfType qType = gfTypeFromName(qTypeName); enum gfType tType = gfTypeFromName(tTypeName); int dotMod = 0; char databaseName[256]; struct hash *tFileCache = gfFileCacheNew(); snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName); gvo = gfOutputAny(outputFormat, round(minIdentity*10), qType == gftProt, tType == gftProt, optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out); gfOutputHead(gvo, out); while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt)) { int conn = gfConnect(hostName, portName); if (dots != 0) { if (++dotMod >= dots) { dotMod = 0; fputc('.', stdout); fflush(stdout); } } if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX)) { gvo->reportTargetStrand = TRUE; gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo); } else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX)) { gvo->reportTargetStrand = TRUE; gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo, qType == gftRnaX); if (qType == gftDnaX) { reverseComplement(seq.dna, seq.size); close(conn); conn = gfConnect(hostName, portName); gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache, gvo, FALSE); } } else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna)) { gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo); conn = gfConnect(hostName, portName); reverseComplement(seq.dna, seq.size); gfAlignStrand(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache, gvo); } else { errAbort("Comparisons between %s queries and %s databases not yet supported", qTypeName, tTypeName); } gfOutputQuery(gvo, out); } if (out != stdout) printf("Output is in %s\n", outName); gfFileCacheFree(&tFileCache); }