示例#1
0
int
subscr (symtype a, symtype b, void *info )
{
    double* M;
    FAlphabet *alphabet;
    imbissinfo *p;
    int val;


    p = (imbissinfo*) info;
    alphabet = p->alphabet;
    M = (double*) p->sub;

    /*	ad = alphabet->mapdomain[(Uint)a];
    	bd = alphabet->mapdomain[(Uint)b];
    	printf("decoding %d and %d", ad, bd);
    	tupela = decodeCantor(NULL, ad, 2);
    	tupelb = decodeCantor(NULL, bd, 2);
    	printf("... ended ... to %d and %d \n", VECTOR(tupela,0), VECTOR(tupelb,0));
    	val =  MATRIX2D(M, 309, VECTOR(tupela, 0), VECTOR(tupelb,0));
    */
    val = MATRIX2D(M, 308, a ,b);
    if (val < -10) val = -10;

    return val;
}
double*
createsubmatrix (double *matrix, double *scr, Uint noofscores)
{
  	Uint i,j;

	for(i=0; i < noofscores; i++) {
		for (j=0; j < noofscores; j++) {
			MATRIX2D(matrix, noofscores, i, j) = 
			  MATRIX2D(matrix, noofscores, i, j) * scr[i];
			if(i==j) {
				MATRIX2D(matrix, noofscores, i, j) = scr[i];
			}
		}
	}
	
	return matrix;
}
int
main (int argc, char** argv)
{	
 	Sint optindex, c;
	unsigned char depictsw=0;
	unsigned char wurst=1;
	unsigned char gnuplot=0;

	Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0;
	Uint noofhits=100;
	Uint substrlen = 10;
	Uint minseeds = 5;
	Uint maxmatches = 10000;
	char *seq, *vec, *bin;
	imbissinfo *imbiss;
	void *space = NULL;	
	double *scores = NULL;

	int swscores[2]={3,-2};
	char *pveclistfile=NULL;
	char *alphabetfile=NULL;	
	char *inputfile=NULL;
	char *batchfile = NULL;
	char *subfile=NULL;
	char *reportfile = NULL;

	 int (*handler)
	   (void *, Matchtype *, IntSequence **, Uint, 
		Uint, void *) = allscores;
	 
	 double (*filter) 
	   (void *, Matchtype *, IntSequence *, IntSequence *,
		 Uint *, Uint, Uint, void *) = swconstfilter;

	 Matchtype* (*select)
	   (void *, Matchtype *, Uint k, 
		IntSequence *, IntSequence **, void *) = selectSW;
  

	stringset_t **fn, **freq, *queryurl, **queries=NULL;
	Suffixarray *arr = NULL;	
	IntSequence **sequences = NULL;
	IntSequence *input = NULL;
	FAlphabet *alphabet = NULL;		
    PairSint *matches=NULL;
	Uint percent=0;
	
	time_t startsuf, endsuf; 
	double difsuf, difmatch, difrank;	

#ifdef MEMMAN_H 	
	Spacetable spacetab;
	initmemoryblocks(&spacetab, 100000);
	space = &spacetab;
#endif
	
	while(1) 
	{
		c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", 
						long_options, &optindex);
		if (c==-1) break;
		
		switch(c) {
		    case 'r':
			  reportfile=optarg;
			  break;
		    case 'v':
			  verbose_flag=1;	
			  break;
		    case 'd':
			  	depictsw = 1;
				break;
		    case 's':
				pveclistfile = optarg;	
				break;
			case 'a':
				alphabetfile = optarg;	
				break;
			case 'q':
				inputfile = optarg;
				noofqueries = 1;
				break;
			case 'l':
				substrlen = atoi(optarg);
				break;
			case 'c':
			  	minseeds = atoi(optarg);
			    break;
			case 'b':
				batchfile = optarg;
				break;
			case 'p':
				percent = atoi(optarg);
				break;
			case 'x':
				subfile = optarg;
				break;
			case 'n':
				noofhits = atoi(optarg);
				break;
			case 'w':
				wurst = 0;
				break;
			case 'B':
				filter = scorefilter;
				select = selectBlastScore;
				break;
			case 'S':
				filter = scorefilter;
				select = selectScore;
				break;
			case 'A':
				filter = swconstfilter;
				select = selectSW;
				break;
			case 'F':
				filter = scorefilter;
				select = selectScoreSWconst;
				break;
			case 'G':
				filter = scorefilter;
				select = selectBlastScoreSWconst;
				break;
			case 'M':
				swscores[0]=atoi(optarg);
				break;
			case 'L':
				handler = latexscores;
				break;
			case 'D':
				swscores[1]=atoi(optarg);
				break;
			case 'g':
				gnuplot = 1;
				break;
			case 'm':
				maxmatches=atoi(optarg);
				break;
			case 'h':
			default:
				usage(argv[0]);
				exit (EXIT_FAILURE);
		}
	}
	if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL)
		|| alphabetfile == NULL) {
		usage(argv[0]);
		exit (EXIT_FAILURE);
	}
	
	imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1);
	imbiss->reportfile = reportfile;
	imbiss->swscores = swscores;
	imbiss->noofhits = noofhits;
	imbiss->minseeds = minseeds; 	
	imbiss->wurst = wurst; 
	
	/*read batch file*/
	if (batchfile) {
		queries = readcsv(space, batchfile, "", &noofqueries);	
	}

	/*read substitution matrix*/
	if (subfile) {
		freq=readcsv(space, subfile,",", &nooffreqs);
		scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) );
		for(i=1; i < nooffreqs; i++) {
			for(j=1; j < nooffreqs; j++) {
				if(strcmp(SETSTR(freq[i],j),"inf")==0){
					MATRIX2D(scores, nooffreqs-1, i, j)=0;
				}else{
					MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j));
				}
			}
		}
	}
	
	/*read alphabet*/	
	if (alphabetfile != NULL) {
		alphabet = loadCSValphabet(space, alphabetfile);
		sortMapdomain(space, alphabet);
    }

	
	/*load sequence database*/
	fn=readcsv(space, pveclistfile, "", &noofseqs);
	sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs);
	for(i=0; i < noofseqs; i++) 
	{		  
		sequences[i] = loadSequence(space, SETSTR(fn[i],0));		
	}

	for (i=0; i < noofseqs; i++) {	
	  	destructStringset(space, fn[i]);
	}
	FREEMEMORY(space, fn);
	
	
	/*construct the suffix array*/
	time (&startsuf);
	arr = constructSufArr(space, sequences, noofseqs, NULL);
        constructLcp(space, arr); 	
   	time (&endsuf);
	difsuf = difftime (endsuf, startsuf);


	/*do search*/
    for (i=0; i < noofqueries; i++) {
	  
	    /*get query form batchfile*/
	  	if (queries) {
			inputfile = SETSTR(queries[i],0);
		}	
		
		/*typically only used with batchfile*/
		if (percent != 0) {
			substrlen = 
			  ((double)((double)input->length/100)*(double) percent);
		}
			
		input = loadSequence(space, inputfile);
		//seq = printSequence (space, input, 60); 
		printf(">IMBISS order delivered\n");	
		//printf("%s\n",seq);
		printf("%s\n", input->url); 
		//FREEMEMORY(space, seq);	
		
		time (&startsuf);
		matches=sufSubstring(space, arr, input->sequence, input->length, substrlen);	 
		time (&endsuf);
		difmatch = difftime (endsuf, startsuf);

		/*get prob vector url for salami/wurst*/
        //printf("%.*s\n", 5, input->url + 58);
        vec = malloc(sizeof(char)*66);
        sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56);
        bin = malloc(sizeof(char)*54);
        sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56);

		queryurl = initStringset(space);
		addString(space, queryurl, bin, strlen(bin));
		addString(space, queryurl, vec, strlen(vec));

		
        getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss);
	

		imbiss->query = queryurl;
		imbiss->substrlen = substrlen;
		imbiss->alphabet = alphabet;
		
		/*if a substition file was given ...*/
		if (subfile) {
			imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1);
		}
		
		/*match 'n' report*/
		time (&startsuf);
		
		imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen));
		memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen)));
	
		rankSufmatch(space, arr, matches, input->length-substrlen,
			maxmatches, substrlen, 
			sequences, noofseqs, filter, select, handler,
			input, imbiss, scores, depictsw);
		
		if (gnuplot) {	
			consensus (space, imbiss->consensus, input->length-substrlen, 
				input, substrlen, imbiss);
		}

		time (&endsuf);
		difrank = difftime (endsuf, startsuf);
	
		printf ("Building  the suffixtree has taken %f seconds.\n", difsuf);
		printf ("Match the suffixtree has taken %f seconds.\n", difmatch);
    		printf ("Rank  the suffixtree has taken %f seconds.\n", difrank);
	
		/*partial cleanup*/
		//destructStringset(space, queryurl);
		destructSequence(space, input);
		if(subfile) {
			FREEMEMORY(space, imbiss->sub);
		}

		FREEMEMORY(space, imbiss->consensus);
		FREEMEMORY(space, imbiss->score);
		FREEMEMORY(space, matches);
        free(bin);
        free(vec);
	}
	
	/*final cleanup*/
	for (i=0; i < noofseqs; i++) {
		destructSequence(space, sequences[i]);
	}
    FREEMEMORY(space, sequences);	
	destructSufArr(space, arr);
	
#ifdef MEMMAN_H
	activeblocks(space);
#endif
	
	printf("Goodbye.\n");	
	return EXIT_SUCCESS;
}