struct gfOutput *gfOutputAny(char *format, int goodPpt, boolean qIsProt, boolean tIsProt, boolean noHead, char *databaseName, int databaseSeqCount, double databaseLetters, double minIdentity, FILE *f) /* Initialize output in a variety of formats in file or memory. * Parameters: * format - either 'psl', 'pslx', 'sim4', 'blast', 'wublast', 'axt', 'xml' * goodPpt - minimum identity of alignments to output in parts per thousand * qIsProt - true if query side is a protein. * tIsProt - true if target (database) side is a protein. * noHead - if true suppress header in psl/pslx output. * databaseName - name of database. Only used for blast output * databaseSeq - number of sequences in database - only for blast * databaseLetters - number of bases/aas in database - only blast * minIdentity - minimum identity - only blast * FILE *f - file. */ { struct gfOutput *out = NULL; static char *blastTypes[] = {"blast", "wublast", "blast8", "blast9", "xml"}; if (format == NULL) format = "psl"; if (sameWord(format, "psl")) out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, FALSE, noHead); else if (sameWord(format, "pslx")) out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, TRUE, noHead); else if (sameWord(format, "sim4")) out = gfOutputSim4(goodPpt, qIsProt, tIsProt, databaseName); else if (stringArrayIx(format, blastTypes, ArraySize(blastTypes)) >= 0) out = gfOutputBlast(goodPpt, qIsProt, tIsProt, databaseName, databaseSeqCount, databaseLetters, format, minIdentity, f); else if (sameWord(format, "axt")) out = gfOutputAxt(goodPpt, qIsProt, tIsProt, f); else if (sameWord(format, "maf")) out = gfOutputMaf(goodPpt, qIsProt, tIsProt, f); else errAbort("Unrecognized output format '%s'", format); return out; }
struct psl* doDnaAlignment(struct dnaSeq *seq, char *db, char *blatHost, char *port, char *nibDir, struct hash *tFileCache) /* get the alignment from the blat host for this sequence */ { struct psl *pslList = NULL; int conn =0; struct tempName pslTn; FILE *f = NULL; struct gfOutput *gvo; if(seq == NULL || db == NULL) errAbort("coordConv::doDnaAlignment() - dnaSeq and/or db can't be NULL."); if(strlen(seq->dna) != seq->size) errAbort("coordConv::doDnaAlignment() - there seems to be something fishy about %s: the size doesn't equal the length", seq->name); /* if there are too many n's it can cause the blat server to hang */ if(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn") ) return NULL; makeTempName(&pslTn,"ccR", ".psl"); f = mustOpen(pslTn.forCgi, "w"); gvo = gfOutputPsl(920, FALSE, FALSE, f, FALSE, FALSE); gfOutputHead(gvo, f); /* align to genome, both strands */ conn = gfConnect(blatHost, port); gfAlignStrand(&conn, nibDir, seq, FALSE, 20, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(blatHost, port); gfAlignStrand(&conn, nibDir, seq, TRUE, 20 , tFileCache, gvo); gfOutputQuery(gvo, f); carefulClose(&f); pslList = pslLoadAll(pslTn.forCgi); remove(pslTn.forCgi); gfOutputFree(&gvo); return pslList; }
void blatSeq(char *userSeq, char *organism) /* Blat sequence user pasted in. */ { FILE *f; struct dnaSeq *seqList = NULL, *seq; struct tempName pslTn, faTn; int maxSingleSize, maxTotalSize, maxSeqCount; int minSingleSize = minMatchShown; char *genome, *db; char *type = cgiString("type"); char *seqLetters = cloneString(userSeq); struct serverTable *serve; int conn; int oneSize, totalSize = 0, seqCount = 0; boolean isTx = FALSE; boolean isTxTx = FALSE; boolean txTxBoth = FALSE; struct gfOutput *gvo; boolean qIsProt = FALSE; enum gfType qType, tType; struct hash *tFileCache = gfFileCacheNew(); boolean feelingLucky = cgiBoolean("Lucky"); getDbAndGenome(cart, &db, &genome, oldVars); if(!feelingLucky) cartWebStart(cart, db, "%s BLAT Results", trackHubSkipHubName(organism)); /* Load user sequence and figure out if it is DNA or protein. */ if (sameWord(type, "DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = FALSE; } else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = TRUE; isTxTx = TRUE; txTxBoth = sameWord(type, "translated DNA"); } else if (sameWord(type, "protein")) { seqList = faSeqListFromMemText(seqLetters, FALSE); isTx = TRUE; qIsProt = TRUE; } else { seqList = faSeqListFromMemTextRaw(seqLetters); isTx = !seqIsDna(seqList); if (!isTx) { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = dnaFilteredSize(seq->dna); dnaFilter(seq->dna, seq->dna); toLowerN(seq->dna, seq->size); subChar(seq->dna, 'u', 't'); } } else { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = aaFilteredSize(seq->dna); aaFilter(seq->dna, seq->dna); toUpperN(seq->dna, seq->size); } qIsProt = TRUE; } } if (seqList != NULL && seqList->name[0] == 0) { freeMem(seqList->name); seqList->name = cloneString("YourSeq"); } trimUniq(seqList); /* If feeling lucky only do the first on. */ if(feelingLucky && seqList != NULL) { seqList->next = NULL; } /* Figure out size allowed. */ maxSingleSize = (isTx ? 10000 : 75000); maxTotalSize = maxSingleSize * 2.5; #ifdef LOWELAB maxSeqCount = 200; #else maxSeqCount = 25; #endif /* Create temporary file to store sequence. */ trashDirFile(&faTn, "hgSs", "hgSs", ".fa"); faWriteAll(faTn.forCgi, seqList); /* Create a temporary .psl file with the alignments against genome. */ trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx"); f = mustOpen(pslTn.forCgi, "w"); gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE); serve = findServer(db, isTx); /* Write header for extended (possibly protein) psl file. */ if (isTx) { if (isTxTx) { qType = gftDnaX; tType = gftDnaX; } else { qType = gftProt; tType = gftDnaX; } } else { qType = gftDna; tType = gftDna; } pslxWriteHead(f, qType, tType); if (qType == gftProt) { minSingleSize = 14; } else if (qType == gftDnaX) { minSingleSize = 36; } /* Loop through each sequence. */ for (seq = seqList; seq != NULL; seq = seq->next) { printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */ oneSize = realSeqSize(seq, !isTx); if ((seqCount&1) == 0) // Call bot delay every 2nd time starting with first time hgBotDelay(); if (++seqCount > maxSeqCount) { warn("More than 25 input sequences, stopping at %s.", seq->name); break; } if (oneSize > maxSingleSize) { warn("Sequence %s is %d letters long (max is %d), skipping", seq->name, oneSize, maxSingleSize); continue; } if (oneSize < minSingleSize) { warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", seq->name, oneSize, minSingleSize); // we could use "continue;" here to actually enforce skipping, // but let's give the short sequence a chance, it might work. // minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein. if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0 continue; } totalSize += oneSize; if (totalSize > maxTotalSize) { warn("Sequence %s would take us over the %d letter limit, stopping here.", seq->name, maxTotalSize); break; } conn = gfConnect(serve->host, serve->port); if (isTx) { gvo->reportTargetStrand = TRUE; if (isTxTx) { gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth); if (txTxBoth) { reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE); } } else { gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo); } } else { gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo); } gfOutputQuery(gvo, f); } carefulClose(&f); showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, organism, feelingLucky); if(!feelingLucky) cartWebEnd(); gfFileCacheFree(&tFileCache); }