int subscr (symtype a, symtype b, void *info ) { double* M; FAlphabet *alphabet; imbissinfo *p; int val; p = (imbissinfo*) info; alphabet = p->alphabet; M = (double*) p->sub; /* ad = alphabet->mapdomain[(Uint)a]; bd = alphabet->mapdomain[(Uint)b]; printf("decoding %d and %d", ad, bd); tupela = decodeCantor(NULL, ad, 2); tupelb = decodeCantor(NULL, bd, 2); printf("... ended ... to %d and %d \n", VECTOR(tupela,0), VECTOR(tupelb,0)); val = MATRIX2D(M, 309, VECTOR(tupela, 0), VECTOR(tupelb,0)); */ val = MATRIX2D(M, 308, a ,b); if (val < -10) val = -10; return val; }
double* createsubmatrix (double *matrix, double *scr, Uint noofscores) { Uint i,j; for(i=0; i < noofscores; i++) { for (j=0; j < noofscores; j++) { MATRIX2D(matrix, noofscores, i, j) = MATRIX2D(matrix, noofscores, i, j) * scr[i]; if(i==j) { MATRIX2D(matrix, noofscores, i, j) = scr[i]; } } } return matrix; }
int main (int argc, char** argv) { Sint optindex, c; unsigned char depictsw=0; unsigned char wurst=1; unsigned char gnuplot=0; Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0; Uint noofhits=100; Uint substrlen = 10; Uint minseeds = 5; Uint maxmatches = 10000; char *seq, *vec, *bin; imbissinfo *imbiss; void *space = NULL; double *scores = NULL; int swscores[2]={3,-2}; char *pveclistfile=NULL; char *alphabetfile=NULL; char *inputfile=NULL; char *batchfile = NULL; char *subfile=NULL; char *reportfile = NULL; int (*handler) (void *, Matchtype *, IntSequence **, Uint, Uint, void *) = allscores; double (*filter) (void *, Matchtype *, IntSequence *, IntSequence *, Uint *, Uint, Uint, void *) = swconstfilter; Matchtype* (*select) (void *, Matchtype *, Uint k, IntSequence *, IntSequence **, void *) = selectSW; stringset_t **fn, **freq, *queryurl, **queries=NULL; Suffixarray *arr = NULL; IntSequence **sequences = NULL; IntSequence *input = NULL; FAlphabet *alphabet = NULL; PairSint *matches=NULL; Uint percent=0; time_t startsuf, endsuf; double difsuf, difmatch, difrank; #ifdef MEMMAN_H Spacetable spacetab; initmemoryblocks(&spacetab, 100000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", long_options, &optindex); if (c==-1) break; switch(c) { case 'r': reportfile=optarg; break; case 'v': verbose_flag=1; break; case 'd': depictsw = 1; break; case 's': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'q': inputfile = optarg; noofqueries = 1; break; case 'l': substrlen = atoi(optarg); break; case 'c': minseeds = atoi(optarg); break; case 'b': batchfile = optarg; break; case 'p': percent = atoi(optarg); break; case 'x': subfile = optarg; break; case 'n': noofhits = atoi(optarg); break; case 'w': wurst = 0; break; case 'B': filter = scorefilter; select = selectBlastScore; break; case 'S': filter = scorefilter; select = selectScore; break; case 'A': filter = swconstfilter; select = selectSW; break; case 'F': filter = scorefilter; select = selectScoreSWconst; break; case 'G': filter = scorefilter; select = selectBlastScoreSWconst; break; case 'M': swscores[0]=atoi(optarg); break; case 'L': handler = latexscores; break; case 'D': swscores[1]=atoi(optarg); break; case 'g': gnuplot = 1; break; case 'm': maxmatches=atoi(optarg); break; case 'h': default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL) || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1); imbiss->reportfile = reportfile; imbiss->swscores = swscores; imbiss->noofhits = noofhits; imbiss->minseeds = minseeds; imbiss->wurst = wurst; /*read batch file*/ if (batchfile) { queries = readcsv(space, batchfile, "", &noofqueries); } /*read substitution matrix*/ if (subfile) { freq=readcsv(space, subfile,",", &nooffreqs); scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) ); for(i=1; i < nooffreqs; i++) { for(j=1; j < nooffreqs; j++) { if(strcmp(SETSTR(freq[i],j),"inf")==0){ MATRIX2D(scores, nooffreqs-1, i, j)=0; }else{ MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j)); } } } } /*read alphabet*/ if (alphabetfile != NULL) { alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); } /*load sequence database*/ fn=readcsv(space, pveclistfile, "", &noofseqs); sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs); for(i=0; i < noofseqs; i++) { sequences[i] = loadSequence(space, SETSTR(fn[i],0)); } for (i=0; i < noofseqs; i++) { destructStringset(space, fn[i]); } FREEMEMORY(space, fn); /*construct the suffix array*/ time (&startsuf); arr = constructSufArr(space, sequences, noofseqs, NULL); constructLcp(space, arr); time (&endsuf); difsuf = difftime (endsuf, startsuf); /*do search*/ for (i=0; i < noofqueries; i++) { /*get query form batchfile*/ if (queries) { inputfile = SETSTR(queries[i],0); } /*typically only used with batchfile*/ if (percent != 0) { substrlen = ((double)((double)input->length/100)*(double) percent); } input = loadSequence(space, inputfile); //seq = printSequence (space, input, 60); printf(">IMBISS order delivered\n"); //printf("%s\n",seq); printf("%s\n", input->url); //FREEMEMORY(space, seq); time (&startsuf); matches=sufSubstring(space, arr, input->sequence, input->length, substrlen); time (&endsuf); difmatch = difftime (endsuf, startsuf); /*get prob vector url for salami/wurst*/ //printf("%.*s\n", 5, input->url + 58); vec = malloc(sizeof(char)*66); sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56); bin = malloc(sizeof(char)*54); sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56); queryurl = initStringset(space); addString(space, queryurl, bin, strlen(bin)); addString(space, queryurl, vec, strlen(vec)); getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss); imbiss->query = queryurl; imbiss->substrlen = substrlen; imbiss->alphabet = alphabet; /*if a substition file was given ...*/ if (subfile) { imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1); } /*match 'n' report*/ time (&startsuf); imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen)); memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen))); rankSufmatch(space, arr, matches, input->length-substrlen, maxmatches, substrlen, sequences, noofseqs, filter, select, handler, input, imbiss, scores, depictsw); if (gnuplot) { consensus (space, imbiss->consensus, input->length-substrlen, input, substrlen, imbiss); } time (&endsuf); difrank = difftime (endsuf, startsuf); printf ("Building the suffixtree has taken %f seconds.\n", difsuf); printf ("Match the suffixtree has taken %f seconds.\n", difmatch); printf ("Rank the suffixtree has taken %f seconds.\n", difrank); /*partial cleanup*/ //destructStringset(space, queryurl); destructSequence(space, input); if(subfile) { FREEMEMORY(space, imbiss->sub); } FREEMEMORY(space, imbiss->consensus); FREEMEMORY(space, imbiss->score); FREEMEMORY(space, matches); free(bin); free(vec); } /*final cleanup*/ for (i=0; i < noofseqs; i++) { destructSequence(space, sequences[i]); } FREEMEMORY(space, sequences); destructSufArr(space, arr); #ifdef MEMMAN_H activeblocks(space); #endif printf("Goodbye.\n"); return EXIT_SUCCESS; }