struct psl* doDnaAlignment(struct dnaSeq *seq, char *db, char *blatHost, 
	char *port, char *nibDir, struct hash *tFileCache) 
/* get the alignment from the blat host for this sequence */
{
struct psl *pslList = NULL;
int conn =0;
struct tempName pslTn;
FILE *f = NULL;
struct gfOutput *gvo;


if(seq == NULL || db == NULL)
    errAbort("coordConv::doDnaAlignment() - dnaSeq and/or db can't be NULL.");
if(strlen(seq->dna) != seq->size)
    errAbort("coordConv::doDnaAlignment() - there seems to be something fishy about %s: the size doesn't equal the length", seq->name);
/* if there are too many n's it can cause the blat server to hang */
if(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn") )
  return NULL; 
makeTempName(&pslTn,"ccR", ".psl");
f = mustOpen(pslTn.forCgi, "w");
gvo = gfOutputPsl(920, FALSE, FALSE, f, FALSE, FALSE);
gfOutputHead(gvo, f);

/* align to genome, both strands */
conn = gfConnect(blatHost, port);
gfAlignStrand(&conn, nibDir, seq, FALSE, 20, tFileCache, gvo);
reverseComplement(seq->dna, seq->size);
conn = gfConnect(blatHost, port);
gfAlignStrand(&conn, nibDir, seq, TRUE, 20 , tFileCache, gvo);
gfOutputQuery(gvo, f);
carefulClose(&f);
pslList = pslLoadAll(pslTn.forCgi);
remove(pslTn.forCgi);
gfOutputFree(&gvo);
return pslList;
}
Beispiel #2
0
void blatSeq(char *userSeq, char *organism)
/* Blat sequence user pasted in. */
{
FILE *f;
struct dnaSeq *seqList = NULL, *seq;
struct tempName pslTn, faTn;
int maxSingleSize, maxTotalSize, maxSeqCount;
int minSingleSize = minMatchShown;
char *genome, *db;
char *type = cgiString("type");
char *seqLetters = cloneString(userSeq);
struct serverTable *serve;
int conn;
int oneSize, totalSize = 0, seqCount = 0;
boolean isTx = FALSE;
boolean isTxTx = FALSE;
boolean txTxBoth = FALSE;
struct gfOutput *gvo;
boolean qIsProt = FALSE;
enum gfType qType, tType;
struct hash *tFileCache = gfFileCacheNew();
boolean feelingLucky = cgiBoolean("Lucky");

getDbAndGenome(cart, &db, &genome, oldVars);
if(!feelingLucky)
    cartWebStart(cart, db, "%s BLAT Results",  trackHubSkipHubName(organism));
/* Load user sequence and figure out if it is DNA or protein. */
if (sameWord(type, "DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = FALSE;
    }
else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA"))
    {
    seqList = faSeqListFromMemText(seqLetters, TRUE);
    uToT(seqList);
    isTx = TRUE;
    isTxTx = TRUE;
    txTxBoth = sameWord(type, "translated DNA");
    }
else if (sameWord(type, "protein"))
    {
    seqList = faSeqListFromMemText(seqLetters, FALSE);
    isTx = TRUE;
    qIsProt = TRUE;
    }
else 
    {
    seqList = faSeqListFromMemTextRaw(seqLetters);
    isTx = !seqIsDna(seqList);
    if (!isTx)
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = dnaFilteredSize(seq->dna);
	    dnaFilter(seq->dna, seq->dna);
	    toLowerN(seq->dna, seq->size);
	    subChar(seq->dna, 'u', 't');
	    }
	}
    else
	{
	for (seq = seqList; seq != NULL; seq = seq->next)
	    {
	    seq->size = aaFilteredSize(seq->dna);
	    aaFilter(seq->dna, seq->dna);
	    toUpperN(seq->dna, seq->size);
	    }
	qIsProt = TRUE;
	}
    }
if (seqList != NULL && seqList->name[0] == 0)
    {
    freeMem(seqList->name);
    seqList->name = cloneString("YourSeq");
    }
trimUniq(seqList);

/* If feeling lucky only do the first on. */
if(feelingLucky && seqList != NULL)
    {
    seqList->next = NULL;
    }

/* Figure out size allowed. */
maxSingleSize = (isTx ? 10000 : 75000);
maxTotalSize = maxSingleSize * 2.5;
#ifdef LOWELAB
maxSeqCount = 200;
#else
maxSeqCount = 25;
#endif

/* Create temporary file to store sequence. */
trashDirFile(&faTn, "hgSs", "hgSs", ".fa");
faWriteAll(faTn.forCgi, seqList);

/* Create a temporary .psl file with the alignments against genome. */
trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx");
f = mustOpen(pslTn.forCgi, "w");
gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE);
serve = findServer(db, isTx);
/* Write header for extended (possibly protein) psl file. */
if (isTx)
    {
    if (isTxTx)
        {
	qType = gftDnaX;
	tType = gftDnaX;
	}
    else
        {
	qType = gftProt;
	tType = gftDnaX;
	}
    }
else
    {
    qType = gftDna;
    tType = gftDna;
    }
pslxWriteHead(f, qType, tType);

if (qType == gftProt)
    {
    minSingleSize = 14;
    }
else if (qType == gftDnaX)
    {
    minSingleSize = 36;
    }


/* Loop through each sequence. */
for (seq = seqList; seq != NULL; seq = seq->next)
    {
    printf(" "); fflush(stdout);  /* prevent apache cgi timeout by outputting something */
    oneSize = realSeqSize(seq, !isTx);
    if ((seqCount&1) == 0)	// Call bot delay every 2nd time starting with first time
	hgBotDelay();
    if (++seqCount > maxSeqCount)
        {
	warn("More than 25 input sequences, stopping at %s.",
	    seq->name);
	break;
	}
    if (oneSize > maxSingleSize)
	{
	warn("Sequence %s is %d letters long (max is %d), skipping",
	    seq->name, oneSize, maxSingleSize);
	continue;
	}
    if (oneSize < minSingleSize)
        {
	warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", 
		seq->name, oneSize, minSingleSize);
	// we could use "continue;" here to actually enforce skipping, 
	// but let's give the short sequence a chance, it might work.
	// minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein.
	if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0
	    continue;
	}
    totalSize += oneSize;
    if (totalSize > maxTotalSize)
        {
	warn("Sequence %s would take us over the %d letter limit, stopping here.",
	     seq->name, maxTotalSize);
	break;
	}
    conn = gfConnect(serve->host, serve->port);
    if (isTx)
	{
	gvo->reportTargetStrand = TRUE;
	if (isTxTx)
	    {
	    gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, 
	    	tFileCache, gvo, !txTxBoth);
	    if (txTxBoth)
		{
		reverseComplement(seq->dna, seq->size);
		conn = gfConnect(serve->host, serve->port);
		gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, 
			tFileCache, gvo, FALSE);
		}
	    }
	else
	    {
	    gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo);
	    }
	}
    else
	{
	gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo);
	reverseComplement(seq->dna, seq->size);
	conn = gfConnect(serve->host, serve->port);
	gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo);
	}
    gfOutputQuery(gvo, f);
    }
carefulClose(&f);
showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, 
	      organism, feelingLucky);
if(!feelingLucky)
    cartWebEnd();
gfFileCacheFree(&tFileCache);
}
Beispiel #3
0
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
	char *outName, char *tTypeName, char *qTypeName)
/* gfClient - A client for the genomic finding program that produces a .psl file. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
static bioSeq seq;
FILE *out = mustOpen(outName, "w");
enum gfType qType = gfTypeFromName(qTypeName);
enum gfType tType = gfTypeFromName(tTypeName);
int dotMod = 0;
char databaseName[256];
struct hash *tFileCache = gfFileCacheNew();

snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);

gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
	optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
gfOutputHead(gvo, out);
while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
    {
    int conn = gfConnect(hostName, portName);
    if (dots != 0)
        {
	if (++dotMod >= dots)
	    {
	    dotMod = 0;
	    fputc('.', stdout);
	    fflush(stdout);
	    }
	}
    if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo);
	}
    else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, 
		gvo, qType == gftRnaX);
	if (qType == gftDnaX)
	    {
	    reverseComplement(seq.dna, seq.size);
	    close(conn);
	    conn = gfConnect(hostName, portName);
	    gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache,
	    	gvo, FALSE);
	    }
	}
    else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
	{
	gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
	conn = gfConnect(hostName, portName);
	reverseComplement(seq.dna, seq.size);
	gfAlignStrand(&conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
	}
    else
        {
	errAbort("Comparisons between %s queries and %s databases not yet supported",
		qTypeName, tTypeName);
	}
    gfOutputQuery(gvo, out);
    }
if (out != stdout)
    printf("Output is in %s\n", outName);
gfFileCacheFree(&tFileCache);
}