void blat(char *dbFile, char *queryFile, char *outName)
/* blat - Standalone BLAT fast sequence search command line tool. */
{
char **dbFiles, **queryFiles;
int dbCount, queryCount;
struct dnaSeq *dbSeqList, *seq;
struct genoFind *gf;
boolean tIsProt = (tType == gftProt);
boolean qIsProt = (qType == gftProt);
boolean bothSimpleNuc = (tType == gftDna && (qType == gftDna || qType == gftRna));
boolean bothSimpleProt = (tIsProt && qIsProt);
FILE *f = mustOpen(outName, "w");
boolean showStatus = (f != stdout);

databaseName = dbFile;
gfClientFileArray(dbFile, &dbFiles, &dbCount);
if (makeOoc != NULL)
    {
    gfMakeOoc(makeOoc, dbFiles, dbCount, tileSize, repMatch, tType);
    if (showStatus)
	printf("Done making %s\n", makeOoc);
    exit(0);
    }
gfClientFileArray(queryFile, &queryFiles, &queryCount);
dbSeqList = gfClientSeqList(dbCount, dbFiles, tIsProt, tType == gftDnaX, repeats, 
	minRepDivergence, showStatus);
databaseSeqCount = slCount(dbSeqList);
for (seq = dbSeqList; seq != NULL; seq = seq->next)
    databaseLetters += seq->size;

gvo = gfOutputAny(outputFormat, minIdentity*10, qIsProt, tIsProt, noHead, 
	databaseName, databaseSeqCount, databaseLetters, minIdentity, f);

if (bothSimpleNuc || bothSimpleProt)
    {
    struct hash *maskHash = NULL;

    /* Save away masking info for output. */
    if (repeats != NULL)
	{
	maskHash = newHash(0);
	for (seq = dbSeqList; seq != NULL; seq = seq->next)
	    {
	    Bits *maskedBits = maskFromUpperCaseSeq(seq);
	    hashAdd(maskHash, seq->name, maskedBits);
	    }
	}

    /* Handle masking and indexing.  If masking is off, we want the indexer
     * to see unmasked sequence, otherwise we want it to see masked.  However
     * after indexing we always want it unmasked, because things are always
     * unmasked for the extension phase. */
    if (mask == NULL && !bothSimpleProt)
        gfClientUnmask(dbSeqList);
    gf = gfIndexSeq(dbSeqList, minMatch, maxGap, tileSize, repMatch, ooc, 
    	tIsProt, oneOff, FALSE, stepSize);
    if (mask != NULL)
        gfClientUnmask(dbSeqList);

    searchOneIndex(queryCount, queryFiles, gf, outName, tIsProt, maskHash, f, showStatus);
    freeHash(&maskHash);
    }
else if (tType == gftDnaX && qType == gftProt)
    {
    bigBlat(dbSeqList, queryCount, queryFiles, outName, FALSE, TRUE, f, showStatus);
    }
else if (tType == gftDnaX && (qType == gftDnaX || qType == gftRnaX))
    {
    bigBlat(dbSeqList, queryCount, queryFiles, outName, TRUE, qType == gftDnaX, f, showStatus);
    }
else
    {
    errAbort("Unrecognized combination of target and query types\n");
    }
if (dotEvery > 0)
    printf("\n");
freeDnaSeqList(&dbSeqList);
}
Example #2
0
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
	char *outName, char *tTypeName, char *qTypeName)
/* gfClient - A client for the genomic finding program that produces a .psl file. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
static bioSeq seq;
FILE *out = mustOpen(outName, "w");
enum gfType qType = gfTypeFromName(qTypeName);
enum gfType tType = gfTypeFromName(tTypeName);
int dotMod = 0;
char databaseName[256];
struct hash *tFileCache = gfFileCacheNew();

snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);

gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
	optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
gfOutputHead(gvo, out);
while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
    {
    int conn = gfConnect(hostName, portName);
    if (dots != 0)
        {
	if (++dotMod >= dots)
	    {
	    dotMod = 0;
	    fputc('.', stdout);
	    fflush(stdout);
	    }
	}
    if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo);
	}
    else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, 
		gvo, qType == gftRnaX);
	if (qType == gftDnaX)
	    {
	    reverseComplement(seq.dna, seq.size);
	    close(conn);
	    conn = gfConnect(hostName, portName);
	    gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache,
	    	gvo, FALSE);
	    }
	}
    else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
	{
	gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
	conn = gfConnect(hostName, portName);
	reverseComplement(seq.dna, seq.size);
	gfAlignStrand(&conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
	}
    else
        {
	errAbort("Comparisons between %s queries and %s databases not yet supported",
		qTypeName, tTypeName);
	}
    gfOutputQuery(gvo, out);
    }
if (out != stdout)
    printf("Output is in %s\n", outName);
gfFileCacheFree(&tFileCache);
}