int main(int argc, char *argv[])
/* Process command line into global variables and call blat. */
{
boolean tIsProtLike, qIsProtLike;

#ifdef DEBUG
{
char *cmd = "blat hCrea.geno hCrea.mrna foo.psl -t=dnax -q=rnax";
char *words[16];

printf("Debugging parameters\n");
cmd = cloneString(cmd);
argc = chopLine(cmd, words);
argv = words;
}
#endif /* DEBUG */

optionInit(&argc, argv, options);
if (argc != 4)
    usage();

/* Get database and query sequence types and make sure they are
 * legal and compatable. */
if (optionExists("prot"))
    qType = tType = gftProt;
if (optionExists("t"))
    tType = gfTypeFromName(optionVal("t", NULL));
trimA = optionExists("trimA") || optionExists("trima");
trimT = optionExists("trimT") || optionExists("trimt");
trimHardA = optionExists("trimHardA");
switch (tType)
    {
    case gftProt:
    case gftDnaX:
        tIsProtLike = TRUE;
	break;
    case gftDna:
        tIsProtLike = FALSE;
	break;
    default:
	tIsProtLike = FALSE;
        errAbort("Illegal value for 't' parameter");
	break;
    }
if (optionExists("q"))
    qType = gfTypeFromName(optionVal("q", NULL));
if (qType == gftRnaX || qType == gftRna)
    trimA = TRUE;
if (optionExists("noTrimA"))
    trimA = FALSE;
switch (qType)
    {
    case gftProt:
    case gftDnaX:
    case gftRnaX:
	minIdentity = 25;
        qIsProtLike = TRUE;
	break;
    default:
        qIsProtLike = FALSE;
	break;
    }
if ((tIsProtLike ^ qIsProtLike) != 0)
    errAbort("t and q must both be either protein or dna");

/* Set default tile size for protein-based comparisons. */
if (tIsProtLike)
    {
    tileSize = 5;
    minMatch = 1;
    oneOff = FALSE;
    maxGap = 0;
    }

/* Get tile size and related parameters from user and make sure
 * they are within range. */
tileSize = optionInt("tileSize", tileSize);
stepSize = optionInt("stepSize", tileSize);
minMatch = optionInt("minMatch", minMatch);
oneOff = optionExists("oneOff");
fastMap = optionExists("fastMap");
minScore = optionInt("minScore", minScore);
maxGap = optionInt("maxGap", maxGap);
minRepDivergence = optionFloat("minRepDivergence", minRepDivergence);
minIdentity = optionFloat("minIdentity", minIdentity);
gfCheckTileSize(tileSize, tIsProtLike);
if (minMatch < 0)
    errAbort("minMatch must be at least 1");
if (maxGap > 100)
    errAbort("maxGap must be less than 100");



/* Set repMatch parameter from command line, or
 * to reasonable value that depends on tile size. */
if (optionExists("repMatch"))
    repMatch = optionInt("repMatch", repMatch);
else
    repMatch = gfDefaultRepMatch(tileSize, stepSize, tIsProtLike);

/* Gather last few command line options. */
noHead = optionExists("noHead");
ooc = optionVal("ooc", NULL);
makeOoc = optionVal("makeOoc", NULL);
mask = optionVal("mask", NULL);
qMask = optionVal("qMask", NULL);
repeats = optionVal("repeats", NULL);
if (repeats != NULL && mask != NULL && differentString(repeats, mask))
    errAbort("The -mask and -repeat settings disagree.  "
             "You can just omit -repeat if -mask is on");
if (mask != NULL)	/* Mask setting will also set repeats. */
    repeats = mask;
outputFormat = optionVal("out", outputFormat);
dotEvery = optionInt("dots", 0);
/* set global for fuzzy find functions */
setFfIntronMax(optionInt("maxIntron", ffIntronMaxDefault));
setFfExtendThroughN(optionExists("extendThroughN"));


/* Call routine that does the work. */
blat(argv[1], argv[2], argv[3]);
return 0;
}
Esempio n. 2
0
void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
	char *outName, char *tTypeName, char *qTypeName)
/* gfClient - A client for the genomic finding program that produces a .psl file. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
static bioSeq seq;
FILE *out = mustOpen(outName, "w");
enum gfType qType = gfTypeFromName(qTypeName);
enum gfType tType = gfTypeFromName(tTypeName);
int dotMod = 0;
char databaseName[256];
struct hash *tFileCache = gfFileCacheNew();

snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);

gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
	optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
gfOutputHead(gvo, out);
while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
    {
    int conn = gfConnect(hostName, portName);
    if (dots != 0)
        {
	if (++dotMod >= dots)
	    {
	    dotMod = 0;
	    fputc('.', stdout);
	    fflush(stdout);
	    }
	}
    if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo);
	}
    else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
        {
	gvo->reportTargetStrand = TRUE;
	gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, 
		gvo, qType == gftRnaX);
	if (qType == gftDnaX)
	    {
	    reverseComplement(seq.dna, seq.size);
	    close(conn);
	    conn = gfConnect(hostName, portName);
	    gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache,
	    	gvo, FALSE);
	    }
	}
    else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
	{
	gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
	conn = gfConnect(hostName, portName);
	reverseComplement(seq.dna, seq.size);
	gfAlignStrand(&conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
	}
    else
        {
	errAbort("Comparisons between %s queries and %s databases not yet supported",
		qTypeName, tTypeName);
	}
    gfOutputQuery(gvo, out);
    }
if (out != stdout)
    printf("Output is in %s\n", outName);
gfFileCacheFree(&tFileCache);
}