int main(int argc, char *argv[]) /* Process command line into global variables and call blat. */ { boolean tIsProtLike, qIsProtLike; #ifdef DEBUG { char *cmd = "blat hCrea.geno hCrea.mrna foo.psl -t=dnax -q=rnax"; char *words[16]; printf("Debugging parameters\n"); cmd = cloneString(cmd); argc = chopLine(cmd, words); argv = words; } #endif /* DEBUG */ optionInit(&argc, argv, options); if (argc != 4) usage(); /* Get database and query sequence types and make sure they are * legal and compatable. */ if (optionExists("prot")) qType = tType = gftProt; if (optionExists("t")) tType = gfTypeFromName(optionVal("t", NULL)); trimA = optionExists("trimA") || optionExists("trima"); trimT = optionExists("trimT") || optionExists("trimt"); trimHardA = optionExists("trimHardA"); switch (tType) { case gftProt: case gftDnaX: tIsProtLike = TRUE; break; case gftDna: tIsProtLike = FALSE; break; default: tIsProtLike = FALSE; errAbort("Illegal value for 't' parameter"); break; } if (optionExists("q")) qType = gfTypeFromName(optionVal("q", NULL)); if (qType == gftRnaX || qType == gftRna) trimA = TRUE; if (optionExists("noTrimA")) trimA = FALSE; switch (qType) { case gftProt: case gftDnaX: case gftRnaX: minIdentity = 25; qIsProtLike = TRUE; break; default: qIsProtLike = FALSE; break; } if ((tIsProtLike ^ qIsProtLike) != 0) errAbort("t and q must both be either protein or dna"); /* Set default tile size for protein-based comparisons. */ if (tIsProtLike) { tileSize = 5; minMatch = 1; oneOff = FALSE; maxGap = 0; } /* Get tile size and related parameters from user and make sure * they are within range. */ tileSize = optionInt("tileSize", tileSize); stepSize = optionInt("stepSize", tileSize); minMatch = optionInt("minMatch", minMatch); oneOff = optionExists("oneOff"); fastMap = optionExists("fastMap"); minScore = optionInt("minScore", minScore); maxGap = optionInt("maxGap", maxGap); minRepDivergence = optionFloat("minRepDivergence", minRepDivergence); minIdentity = optionFloat("minIdentity", minIdentity); gfCheckTileSize(tileSize, tIsProtLike); if (minMatch < 0) errAbort("minMatch must be at least 1"); if (maxGap > 100) errAbort("maxGap must be less than 100"); /* Set repMatch parameter from command line, or * to reasonable value that depends on tile size. */ if (optionExists("repMatch")) repMatch = optionInt("repMatch", repMatch); else repMatch = gfDefaultRepMatch(tileSize, stepSize, tIsProtLike); /* Gather last few command line options. */ noHead = optionExists("noHead"); ooc = optionVal("ooc", NULL); makeOoc = optionVal("makeOoc", NULL); mask = optionVal("mask", NULL); qMask = optionVal("qMask", NULL); repeats = optionVal("repeats", NULL); if (repeats != NULL && mask != NULL && differentString(repeats, mask)) errAbort("The -mask and -repeat settings disagree. " "You can just omit -repeat if -mask is on"); if (mask != NULL) /* Mask setting will also set repeats. */ repeats = mask; outputFormat = optionVal("out", outputFormat); dotEvery = optionInt("dots", 0); /* set global for fuzzy find functions */ setFfIntronMax(optionInt("maxIntron", ffIntronMaxDefault)); setFfExtendThroughN(optionExists("extendThroughN")); /* Call routine that does the work. */ blat(argv[1], argv[2], argv[3]); return 0; }
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, char *outName, char *tTypeName, char *qTypeName) /* gfClient - A client for the genomic finding program that produces a .psl file. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); static bioSeq seq; FILE *out = mustOpen(outName, "w"); enum gfType qType = gfTypeFromName(qTypeName); enum gfType tType = gfTypeFromName(tTypeName); int dotMod = 0; char databaseName[256]; struct hash *tFileCache = gfFileCacheNew(); snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName); gvo = gfOutputAny(outputFormat, round(minIdentity*10), qType == gftProt, tType == gftProt, optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out); gfOutputHead(gvo, out); while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt)) { int conn = gfConnect(hostName, portName); if (dots != 0) { if (++dotMod >= dots) { dotMod = 0; fputc('.', stdout); fflush(stdout); } } if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX)) { gvo->reportTargetStrand = TRUE; gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo); } else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX)) { gvo->reportTargetStrand = TRUE; gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo, qType == gftRnaX); if (qType == gftDnaX) { reverseComplement(seq.dna, seq.size); close(conn); conn = gfConnect(hostName, portName); gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache, gvo, FALSE); } } else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna)) { gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo); conn = gfConnect(hostName, portName); reverseComplement(seq.dna, seq.size); gfAlignStrand(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache, gvo); } else { errAbort("Comparisons between %s queries and %s databases not yet supported", qTypeName, tTypeName); } gfOutputQuery(gvo, out); } if (out != stdout) printf("Output is in %s\n", outName); gfFileCacheFree(&tFileCache); }