Beispiel #1
0
TwoMotif findMotifInStream(SpotterParams p, std::string streamDir,
						   length_t minMotifLen, length_t maxMotifLen) {
	int numDims = 1;
	Stream dataStream = readStreamFromDir(streamDir, numDims);
	return findMotif(p, &dataStream.data[0], dataStream.data.size(),
					 minMotifLen, maxMotifLen);
}
int main(int argc, char *argv[])
/* Process command line. */
{
int i;
char *cp;
unsigned long long reversed;
size_t maxAlloc;
char asciiAlloc[32];

optionInit(&argc, argv, options);

if (argc < 2)
    usage();

maxAlloc = 2100000000 *
	 (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4)));
sprintLongWithCommas(asciiAlloc, (long long) maxAlloc);
verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc);
setMaxAlloc(maxAlloc);
/* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb
 *      size_t is 8 = 16800000000 ~= 2^34 = 16 Gb
 */

dnaUtilOpen();

motif = optionVal("motif", NULL);
chr = optionVal("chr", NULL);
strand = optionVal("strand", NULL);
bedOutput = optionExists("bedOutput");
wigOutput = optionExists("wigOutput");

if (wigOutput)
    bedOutput = FALSE;
else
    bedOutput = TRUE;

if (chr)
    verbose(2, "#\tprocessing chr: %s\n", chr);
if (strand)
    verbose(2, "#\tprocessing strand: '%s'\n", strand);
if (motif)
    verbose(2, "#\tsearching for motif: %s\n", motif);
else {
    warn("ERROR: -motif string empty, please specify a motif\n");
    usage();
}
verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format");
verbose(2, "#\tspecified sequence: %s\n", argv[1]);
verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal));
if (strand)
    {
    if (! (sameString(strand,"+") | sameString(strand,"-")))
	{
	warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand);
	usage();
	}
    /*	They are both on by default, turn off the one not specified */
    if (sameString(strand,"-"))
	doPlusStrand = FALSE;
    if (sameString(strand,"+"))
	doMinusStrand = FALSE;
    }
motifLen = strlen(motif);
/*	at two bits per character, size limit of motif is
 *	number of bits in motifVal / 2
 */
if (motifLen > (4*sizeof(motifVal))/2 )
    {
    warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 );
    usage();
    }
cp = motif;
motifVal = 0;
complementVal = 0;
for (i = 0; i < motifLen; ++i)
    {
	switch (*cp)
	{
	case 'a':
	case 'A':
	    motifVal = (motifVal << 2) | A_BASE_VAL;
	    complementVal = (complementVal << 2) | T_BASE_VAL;
	    break;
	case 'c':
	case 'C':
	    motifVal = (motifVal << 2) | C_BASE_VAL;
	    complementVal = (complementVal << 2) | G_BASE_VAL;
	    break;
	case 'g':
	case 'G':
	    motifVal = (motifVal << 2) | G_BASE_VAL;
	    complementVal = (complementVal << 2) | C_BASE_VAL;
	    break;
	case 't':
	case 'T':
	    motifVal = (motifVal << 2) | T_BASE_VAL;
	    complementVal = (complementVal << 2) | A_BASE_VAL;
	    break;
	default:
	    warn(
		"ERROR: character in motif: '%c' is not one of ACGT\n", *cp);
	    usage();
	}
	++cp;
    }
reversed = 0;
for (i = 0; i < motifLen; ++i)
    {
    int base;
    base = complementVal & 3;
    reversed = (reversed << 2) | base;
    complementVal >>= 2;
    }
complementVal = reversed;
verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal);
verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal);
if (motifLen < 5)
    {
    warn("ERROR: motif string must be more than 4 characters\n");
    usage();
    }

findMotif(argv[1]);
return 0;
}
Beispiel #3
0
TwoMotif findMotifInStream(SpotterParams p, Stream dataStream,
						   length_t minMotifLen, length_t maxMotifLen) {
	return findMotif(p, &dataStream.data[0], dataStream.data.size(),
					 minMotifLen, maxMotifLen);
}