TwoMotif findMotifInStream(SpotterParams p, std::string streamDir, length_t minMotifLen, length_t maxMotifLen) { int numDims = 1; Stream dataStream = readStreamFromDir(streamDir, numDims); return findMotif(p, &dataStream.data[0], dataStream.data.size(), minMotifLen, maxMotifLen); }
int main(int argc, char *argv[]) /* Process command line. */ { int i; char *cp; unsigned long long reversed; size_t maxAlloc; char asciiAlloc[32]; optionInit(&argc, argv, options); if (argc < 2) usage(); maxAlloc = 2100000000 * (((sizeof(size_t)/4)*(sizeof(size_t)/4)*(sizeof(size_t)/4))); sprintLongWithCommas(asciiAlloc, (long long) maxAlloc); verbose(4, "#\tmaxAlloc: %s\n", asciiAlloc); setMaxAlloc(maxAlloc); /* produces: size_t is 4 == 2100000000 ~= 2^31 = 2Gb * size_t is 8 = 16800000000 ~= 2^34 = 16 Gb */ dnaUtilOpen(); motif = optionVal("motif", NULL); chr = optionVal("chr", NULL); strand = optionVal("strand", NULL); bedOutput = optionExists("bedOutput"); wigOutput = optionExists("wigOutput"); if (wigOutput) bedOutput = FALSE; else bedOutput = TRUE; if (chr) verbose(2, "#\tprocessing chr: %s\n", chr); if (strand) verbose(2, "#\tprocessing strand: '%s'\n", strand); if (motif) verbose(2, "#\tsearching for motif: %s\n", motif); else { warn("ERROR: -motif string empty, please specify a motif\n"); usage(); } verbose(2, "#\ttype output: %s\n", wigOutput ? "wiggle data" : "bed format"); verbose(2, "#\tspecified sequence: %s\n", argv[1]); verbose(2, "#\tsizeof(motifVal): %d\n", (int)sizeof(motifVal)); if (strand) { if (! (sameString(strand,"+") | sameString(strand,"-"))) { warn("ERROR: -strand specified ('%s') is not + or - ?\n", strand); usage(); } /* They are both on by default, turn off the one not specified */ if (sameString(strand,"-")) doPlusStrand = FALSE; if (sameString(strand,"+")) doMinusStrand = FALSE; } motifLen = strlen(motif); /* at two bits per character, size limit of motif is * number of bits in motifVal / 2 */ if (motifLen > (4*sizeof(motifVal))/2 ) { warn("ERROR: motif string too long, limit %d\n", (4*(int)sizeof(motifVal))/2 ); usage(); } cp = motif; motifVal = 0; complementVal = 0; for (i = 0; i < motifLen; ++i) { switch (*cp) { case 'a': case 'A': motifVal = (motifVal << 2) | A_BASE_VAL; complementVal = (complementVal << 2) | T_BASE_VAL; break; case 'c': case 'C': motifVal = (motifVal << 2) | C_BASE_VAL; complementVal = (complementVal << 2) | G_BASE_VAL; break; case 'g': case 'G': motifVal = (motifVal << 2) | G_BASE_VAL; complementVal = (complementVal << 2) | C_BASE_VAL; break; case 't': case 'T': motifVal = (motifVal << 2) | T_BASE_VAL; complementVal = (complementVal << 2) | A_BASE_VAL; break; default: warn( "ERROR: character in motif: '%c' is not one of ACGT\n", *cp); usage(); } ++cp; } reversed = 0; for (i = 0; i < motifLen; ++i) { int base; base = complementVal & 3; reversed = (reversed << 2) | base; complementVal >>= 2; } complementVal = reversed; verbose(2, "#\tmotif numerical value: %llu (%#llx)\n", motifVal, motifVal); verbose(2, "#\tcomplement numerical value: %llu (%#llx)\n", complementVal, complementVal); if (motifLen < 5) { warn("ERROR: motif string must be more than 4 characters\n"); usage(); } findMotif(argv[1]); return 0; }
TwoMotif findMotifInStream(SpotterParams p, Stream dataStream, length_t minMotifLen, length_t maxMotifLen) { return findMotif(p, &dataStream.data[0], dataStream.data.size(), minMotifLen, maxMotifLen); }