void searchOne(bioSeq *seq, struct genoFind *gf, FILE *f, boolean isProt, struct hash *maskHash, Bits *qMaskBits)
/* Search for seq on either strand in index. */
{
dotOut();
if (isProt)
    {
    searchOneProt(seq, gf, f);
    }
else
    {
    gvo->maskHash = maskHash;
    searchOneStrand(seq, gf, f, FALSE, maskHash, qMaskBits);
    reverseComplement(seq->dna, seq->size);
    searchOneStrand(seq, gf, f, TRUE, maskHash, qMaskBits);
    reverseComplement(seq->dna, seq->size);
    }
gfOutputQuery(gvo, f);
}
struct psl* doDnaAlignment(struct dnaSeq *seq, char *db, char *blatHost, 
	char *port, char *nibDir, struct hash *tFileCache) 
/* get the alignment from the blat host for this sequence */
{
struct psl *pslList = NULL;
int conn =0;
struct tempName pslTn;
FILE *f = NULL;
struct gfOutput *gvo;


if(seq == NULL || db == NULL)
    errAbort("coordConv::doDnaAlignment() - dnaSeq and/or db can't be NULL.");
if(strlen(seq->dna) != seq->size)
    errAbort("coordConv::doDnaAlignment() - there seems to be something fishy about %s: the size doesn't equal the length", seq->name);
/* if there are too many n's it can cause the blat server to hang */
if(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn") )
  return NULL; 
makeTempName(&pslTn,"ccR", ".psl");
f = mustOpen(pslTn.forCgi, "w");
gvo = gfOutputPsl(920, FALSE, FALSE, f, FALSE, FALSE);
gfOutputHead(gvo, f);

/* align to genome, both strands */
conn = gfConnect(blatHost, port);
gfAlignStrand(&conn, nibDir, seq, FALSE, 20, tFileCache, gvo);
reverseComplement(seq->dna, seq->size);
conn = gfConnect(blatHost, port);
gfAlignStrand(&conn, nibDir, seq, TRUE, 20 , tFileCache, gvo);
gfOutputQuery(gvo, f);
carefulClose(&f);
pslList = pslLoadAll(pslTn.forCgi);
remove(pslTn.forCgi);
gfOutputFree(&gvo);
return pslList;
}
Exemple #3
0
void blatSeq(char *userSeq, char *organism)
/* Blat sequence user pasted in. */
{
FILE *f;
struct dnaSeq *seqList = NULL, *seq;
struct tempName pslTn, faTn;
int maxSingleSize, maxTotalSize, maxSeqCount;
int minSingleSize = minMatchShown;
char *genome, *db;
char *type = cgiString("type");
char *seqLetters = cloneString(userSeq);
struct serverTable *serve;
int conn;
int oneSize, totalSize = 0, seqCount = 0;
boolean isTx = FALSE;
boolean isTxTx = FALSE;
boolean txTxBoth = FALSE;
struct gfOutput *gvo;
boolean qIsProt = FALSE;
enum gfType qType, tType;
struct hash *tFileCache = gfFileCacheNew();
boolean feelingLucky = cgiBoolean("Lucky");

getDbAndGenome(cart, &db, &genome, oldVars);
if(!feelingLucky)
    cartWebStart(cart, db, "%s BLAT Results",  trackHubSkipHubName(organism));
/* Load user sequence and figure out if it is DNA or protein. */
if (sameWord(type, "DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = FALSE;
    }
else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = TRUE;
    isTxTx = TRUE;
    txTxBoth = sameWord(type, "translated DNA");
    }
else if (sameWord(type, "protein"))
    {
    seqList = faSeqListFromMemText(seqLetters, FALSE);
    isTx = TRUE;
    qIsProt = TRUE;
    }
else 
    {
    seqList = faSeqListFromMemTextRaw(seqLetters);
    isTx = !seqIsDna(seqList);
    if (!isTx)
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = dnaFilteredSize(seq->dna);
	    dnaFilter(seq->dna, seq->dna);
	    toLowerN(seq->dna, seq->size);
	    subChar(seq->dna, 'u', 't');
	    }
	}
    else
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = aaFilteredSize(seq->dna);
	    aaFilter(seq->dna, seq->dna);
	    toUpperN(seq->dna, seq->size);
	    }
	qIsProt = TRUE;
	}
    }
if (seqList != NULL && seqList->name[0] == 0)
    {
    freeMem(seqList->name);
    seqList->name = cloneString("YourSeq");
    }
trimUniq(seqList);

/* If feeling lucky only do the first on. */
if(feelingLucky && seqList != NULL)
    {
    seqList->next = NULL;
    }

/* Figure out size allowed. */
maxSingleSize = (isTx ? 10000 : 75000);
maxTotalSize = maxSingleSize * 2.5;
#ifdef LOWELAB
maxSeqCount = 200;
#else
maxSeqCount = 25;
#endif

/* Create temporary file to store sequence. */
trashDirFile(&faTn, "hgSs", "hgSs", ".fa");
faWriteAll(faTn.forCgi, seqList);

/* Create a temporary .psl file with the alignments against genome. */
trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx");
f = mustOpen(pslTn.forCgi, "w");
gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE);
serve = findServer(db, isTx);
/* Write header for extended (possibly protein) psl file. */
if (isTx)
    {
    if (isTxTx)
        {
	qType = gftDnaX;
	tType = gftDnaX;
	}
    else
        {
	qType = gftProt;
	tType = gftDnaX;
	}
    }
else
    {
    qType = gftDna;
    tType = gftDna;
    }
pslxWriteHead(f, qType, tType);

if (qType == gftProt)
    {
    minSingleSize = 14;
    }
else if (qType == gftDnaX)
    {
    minSingleSize = 36;
    }


/* Loop through each sequence. */
for (seq = seqList; seq != NULL; seq = seq->next)
    {
    printf(" "); fflush(stdout);  /* prevent apache cgi timeout by outputting something */
    oneSize = realSeqSize(seq, !isTx);
    if ((seqCount&1) == 0)	// Call bot delay every 2nd time starting with first time
	hgBotDelay();
    if (++seqCount > maxSeqCount)
        {
	warn("More than 25 input sequences, stopping at %s.",
	    seq->name);
	break;
	}
    if (oneSize > maxSingleSize)
	{
	warn("Sequence %s is %d letters long (max is %d), skipping",
	    seq->name, oneSize, maxSingleSize);
	continue;
	}
    if (oneSize < minSingleSize)
        {
	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", 
		seq->name, oneSize, minSingleSize);
	// we could use "continue;" here to actually enforce skipping, 
	// but let's give the short sequence a chance, it might work.
	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
	    continue;
	}
    totalSize += oneSize;
    if (totalSize > maxTotalSize)
        {
	warn("Sequence %s would take us over the %d letter limit, stopping here.",
	     seq->name, maxTotalSize);
	break;
	}
    conn = gfConnect(serve->host, serve->port);
    if (isTx)
	{
	gvo->reportTargetStrand = TRUE;
	if (isTxTx)
	    {
	    gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, 
	    	tFileCache, gvo, !txTxBoth);
	    if (txTxBoth)
		{
		reverseComplement(seq->dna, seq->size);
		conn = gfConnect(serve->host, serve->port);
		gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, 
			tFileCache, gvo, FALSE);
		}
	    }
	else
	    {
	    gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo);
	    }
	}
    else
	{
	gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo);
	reverseComplement(seq->dna, seq->size);
	conn = gfConnect(serve->host, serve->port);
	gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo);
	}
    gfOutputQuery(gvo, f);
    }
carefulClose(&f);
showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, 
	      organism, feelingLucky);
if(!feelingLucky)
    cartWebEnd();
gfFileCacheFree(&tFileCache);
}
void bigBlat(struct dnaSeq *untransList, int queryCount, char *queryFiles[], char *outFile, boolean transQuery, boolean qIsDna, FILE *out, boolean showStatus)
/* Run query against translated DNA database (3 frames on each strand). */
{
int frame, i;
struct dnaSeq *seq, trimmedSeq;
struct genoFind *gfs[3];
aaSeq *dbSeqLists[3];
struct trans3 *t3List = NULL;
int isRc;
struct lineFile *lf = NULL;
struct hash *t3Hash = NULL;
boolean forceUpper = FALSE;
boolean forceLower = FALSE;
boolean toggle = FALSE;
boolean maskUpper = FALSE;

ZeroVar(&trimmedSeq);
if (showStatus)
    printf("Blatx %d sequences in database, %d files in query\n", slCount(untransList), queryCount);

/* Figure out how to manage query case.  Proteins want to be in
 * upper case, generally, nucleotides in lower case.  But there
 * may be repeatMasking based on case as well. */
if (transQuery)
    {
    if (qMask == NULL)
       forceLower = TRUE;
    else
       {
       maskUpper = TRUE;
       toggle = !sameString(qMask, "upper");
       }
    }
else
    {
    forceUpper = TRUE;
    }

if (gvo->fileHead != NULL)
    gvo->fileHead(gvo, out);

for (isRc = FALSE; isRc <= 1; ++isRc)
    {
    /* Initialize local pointer arrays to NULL to prevent surprises. */
    for (frame = 0; frame < 3; ++frame)
	{
	gfs[frame] = NULL;
	dbSeqLists[frame] = NULL;
	}

    t3List = seqListToTrans3List(untransList, dbSeqLists, &t3Hash);
    for (frame = 0; frame < 3; ++frame)
	{
	gfs[frame] = gfIndexSeq(dbSeqLists[frame], minMatch, maxGap, tileSize, 
		repMatch, ooc, TRUE, oneOff, FALSE, stepSize);
	}

    for (i=0; i<queryCount; ++i)
        {
	aaSeq qSeq;

	lf = lineFileOpen(queryFiles[i], TRUE);
	while (faMixedSpeedReadNext(lf, &qSeq.dna, &qSeq.size, &qSeq.name))
	    {
	    dotOut();
	    /* Put it into right case and optionally mask on case. */
	    if (forceLower)
	        toLowerN(qSeq.dna, qSeq.size);
	    else if (forceUpper)
	        toUpperN(qSeq.dna, qSeq.size);
	    else if (maskUpper)
	        {
		if (toggle)
		    toggleCase(qSeq.dna, qSeq.size);
		upperToN(qSeq.dna, qSeq.size);
		}
	    if (qSeq.size > qWarnSize)
	        {
		warn("Query sequence %s has size %d, it might take a while.",
		     qSeq.name, qSeq.size);
		}
	    trimSeq(&qSeq, &trimmedSeq);
	    if (transQuery)
	        transTripleSearch(&trimmedSeq, gfs, t3Hash, isRc, qIsDna, out);
	    else
		tripleSearch(&trimmedSeq, gfs, t3Hash, isRc, out);
	    gfOutputQuery(gvo, out);
	    }
	lineFileClose(&lf);
	}

    /* Clean up time. */
    trans3FreeList(&t3List);
    freeHash(&t3Hash);
    for (frame = 0; frame < 3; ++frame)
	{
	genoFindFree(&gfs[frame]);
	}

    for (seq = untransList; seq != NULL; seq = seq->next)
        {
	reverseComplement(seq->dna, seq->size);
	}
    }
carefulClose(&out);
}
Exemple #5
0
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
	char *outName, char *tTypeName, char *qTypeName)
/* gfClient - A client for the genomic finding program that produces a .psl file. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
static bioSeq seq;
FILE *out = mustOpen(outName, "w");
enum gfType qType = gfTypeFromName(qTypeName);
enum gfType tType = gfTypeFromName(tTypeName);
int dotMod = 0;
char databaseName[256];
struct hash *tFileCache = gfFileCacheNew();

snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);

gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
	optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
gfOutputHead(gvo, out);
while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
    {
    int conn = gfConnect(hostName, portName);
    if (dots != 0)
        {
	if (++dotMod >= dots)
	    {
	    dotMod = 0;
	    fputc('.', stdout);
	    fflush(stdout);
	    }
	}
    if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo);
	}
    else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, 
		gvo, qType == gftRnaX);
	if (qType == gftDnaX)
	    {
	    reverseComplement(seq.dna, seq.size);
	    close(conn);
	    conn = gfConnect(hostName, portName);
	    gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache,
	    	gvo, FALSE);
	    }
	}
    else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
	{
	gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
	conn = gfConnect(hostName, portName);
	reverseComplement(seq.dna, seq.size);
	gfAlignStrand(&conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
	}
    else
        {
	errAbort("Comparisons between %s queries and %s databases not yet supported",
		qTypeName, tTypeName);
	}
    gfOutputQuery(gvo, out);
    }
if (out != stdout)
    printf("Output is in %s\n", outName);
gfFileCacheFree(&tFileCache);
}