Example #1
0
void
kmismatch(void *space,
    Suffixarray *s,
    fasta_t *reads,
    Uint k,
    Uint* counter,
    Uint rep_type,
    unsigned char silent,
    FILE *dev)
{
  Uint i, curlen;
  char *buffer, *curseq;
  branch_t *V; 
  Gmap map;
  Uint noofmatches=0;
  gread_t read;
  Container C;
  pthread_mutex_t *mtx=NULL;
  
  if (counter == NULL) {
    initProgressBarVT();
  } else { 
    mtx = &mutex2;
  }

  initGmap(&map, s->seq, 1);
  
  for (i=0; i < reads->noofseqs; i++) {

    noofmatches = 0;
    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    if (!silent) {
      if (mtx == NULL) {
        progressBarVT("reads matched.", reads->noofseqs, i, 25);
      } else {
        (*counter)++;
      }
    }

    curseq = reads->seqs[i]->sequence;
    curlen = reads->seqs[i]->length;

    V=kmis(space, s, curseq, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), PLUSSTRAND);
      
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }

    initRead(&read, reads->seqs[i]);
    setReads(&map, &read, 1);
    
    buffer = charDNAcomplement(space, curseq, curlen);
    V=kmis(space, s, buffer, curlen, k, &noofmatches);

    if(noofmatches) {
      bl_containerInit(&C, 100, sizeof(gmatch_t));
      branch2match(s, &C, V, noofmatches);
      setMatches(&read, (gmatch_t*)C.contspace, 
		 bl_containerSize(&C), MINUSSTRAND);
      reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen);
      bl_containerDestruct(&C, NULL);
      FREEMEMORY(space, V);
    }
    FREEMEMORY(space, buffer);
  }

  return;
}
 int 
 main(int argc, char** argv) 
 {
	Uint noofvecs, i;
	Sint optindex, c;
	vector_t info;
#ifdef MEMMAN_H	
	Spacetable spacetab;
#endif
	void *space = NULL;
	char *url = NULL;
	char *outpath = NULL;
	char *pveclistfile = NULL;
	char *alphabetfile = NULL;
	char *vecext="vec";
	char *seqext="seq";	
	struct prob_vec *p_vec;
	IntSequence *sequence;
	FAlphabet *alphabet;	
	stringset_t *tok;
	stringset_t **fn;
	
#ifdef MEMMAN_H 
	initmemoryblocks(&spacetab, 1000);
	space = &spacetab;
#endif

    while(1) {
		c=getopt_long(argc, argv, "f:a:o:", long_options, &optindex);
		if (c==-1) break;
		
		switch(c) {
			case 'f':
				pveclistfile = optarg;	
				break;
			case 'a':
				alphabetfile = optarg;	
				break;
			case 'o':
				outpath = optarg;
				break;
			default:
				usage(argv[0]);
				exit (EXIT_FAILURE);
		}

	}

	if (pveclistfile==NULL || alphabetfile == NULL) {
		usage(argv[0]);
		exit (EXIT_FAILURE);
	}
	
	fn=readcsv(space, pveclistfile, ".", &noofvecs);
	alphabet = loadCSValphabet(space, alphabetfile);
	sortMapdomain(space, alphabet);
   
	for(i=0; i<noofvecs; i++) 
	{	
	  
		INITVECTOR(&info);	
	  	SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), vecext,
							  SETSTRLEN(fn[i],0), 3, '.');
	   
		p_vec = prob_vec_read (SETSTR(fn[i],0));	
			
		if (p_vec->mship == NULL) 
			prob_vec_expand(p_vec);
			
		sequence = encode_prob_vec(space, alphabet, p_vec, 0, 0, 
									cantorchar, &info);
		sequence->info = (Uint*) info.elements;
		sequence->namelen= strlen(alphabetfile);
		
		COPYSTR(space, sequence->alphabetname, alphabetfile, 
							 strlen(alphabetfile));

		/*this is a potential security risk*/
		if (p_vec->compnd_len > 0) {
		  sequence->descrlen = p_vec->compnd_len-1; 
		  COPYSTR(space, sequence->description, p_vec->compnd, 
							 p_vec->compnd_len-1);
		} else {
		  sequence->descrlen = 14;			
		  COPYSTR(space, sequence->description, "descriptor n/a", 14);
		}

		sequence->urllen = SETSTRLEN(fn[i],0);
		COPYSTR(space, sequence->url, SETSTR(fn[i],0), 
						   SETSTRLEN(fn[i],0));
				
		SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), seqext,
							SETSTRLEN(fn[i],0), 3, '.');	
		SETSTRLEN(fn[i],0) += 4;
		
		if (outpath) {
			tok = tokensToStringset(space, "/", SETSTR(fn[i],0), 
					SETSTRLEN(fn[i],0));
			
			COPYSTR(space, url, outpath, strlen(outpath));
	
			url = concat(space, url, SETSTR(tok, tok->noofstrings-1), 
				strlen(url), SETSTRLEN(tok, tok->noofstrings-1));
		
			saveSequence(sequence, url);
			
			destructStringset(space, tok);
			FREEMEMORY(space, url);
			url = NULL;
		
		} else {
	
			saveSequence(sequence, SETSTR(fn[i],0));
		}
		
		destructSequence (space, sequence);		
		prob_vec_destroy (p_vec);			 
		destructStringset (space, fn[i]);
		progressBarVT("probability vectors converted", noofvecs-1, i, 25);	
	}
	
	printf("\nexit.\n");
	FREEMEMORY(space, fn);
	destructAlphabet(space, alphabet);
		
	return EXIT_SUCCESS;
 }
Example #3
0
void
matchkdseed( void *space, 
            Suffixarray *s, 
            fasta_t *reads, 
            Uint minsize,
            char *outfile,
            Uint *counter,
            unsigned char silent,
            Uint s_ext,
            Uint p_mis,
            Uint Xoff,
            Uint k_p,
            Uint rep_type,
            Uint hitstrategy,
            Uint bedist,
            unsigned char showalignment,
            double maxevalue,
            int acc,      
            Uint M,
            unsigned char matchingstat,
            FILE *dev,
            FILE *nomatchdev) {

  double   H,
           K,
           lambda;
  char     *buffer, 
           *curseq;
  Uint     k,
           curlen,
           dim,
           wordno;
  bitvector *D,
            *Mv;
  Gmap      map;
  gread_t   read;
  gmatch_t  *mmatches=NULL,
            *pmatches=NULL;
  matchstem_t *V;

  int plusdiff, minusdiff, noofmatches=0;


  
  pthread_mutex_t *mtx=NULL;
  pthread_mutex_t *mtx2=NULL;
  Uint *enctab, i,j, nmmatch, npmatch;

  int bmscr, bpscr;
  unsigned char uninformative = 0,
                beststrand = 0,
                best = 0;
    
  if (hitstrategy == 2) { 
    beststrand = 1;
    best = 1;
  } else if (hitstrategy == 1) {
    beststrand = 1;
  }


  /*build alignment matrix*/
  enctab = encodetab(s->seq->map, s->seq->mapsize);
  dim = reads->maxlen + 2*((reads->maxlen-ceil((acc*reads->maxlen)/100))+4);
  wordno = reads->maxlen/BITVECTOR_WORDSIZE;
//  wordno += ((reads->maxlen & (BITVECTOR_WORDSIZE-1)) > 0) ? 1 : 0;
  wordno++;

  D = ALLOCMEMORY(space, NULL, bitvector, 2*(dim+1));
  Mv = &D[dim+1];

  for(i=0; i <= dim; i++) {
    D[i]  = initbitvector(space, wordno*BITVECTOR_WORDSIZE);
    Mv[i]  = initbitvector(space, wordno*BITVECTOR_WORDSIZE);
  }  

  if (counter == NULL) {
    initProgressBarVT();
  } else {
    mtx = &mutex1;
    mtx2 = &mutex2;
  }

  karlinunitcostpp(space, &lambda, &H, &K);
  
  for (k=0; k < reads->noofseqs; k++) {
    plusdiff = 0;
    minusdiff = 0;
    noofmatches = 0;

    if (!silent) {
      if (counter == NULL) {
        progressBarVT("reads matched.", reads->noofseqs, k, 25);
      } else {
        (*counter)++;
      }
    }

    curseq = reads->seqs[k]->sequence;
    curlen = reads->seqs[k]->length; 
    npmatch = 0;
    nmmatch = 0;

    if(curlen >= minsize) {  
      initGmap(&map, s->seq, 1);
      initRead(&read, reads->seqs[k]);
      bpscr = 0;
      bmscr = 0;
      
      V=kdseeds(space, s, curseq, curlen, s_ext, p_mis, Xoff, k_p);
#ifdef KDUNINFORMATIVE
      if(V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) {
        uninformative = 1;
        plusdiff = V[0].branches[0].r - V[0].branches[0].l;
      } else 
#endif
      if(!matchingstat) {
        uninformative = 0;
        pmatches = alignkdmatches(space, s, V, reads->seqs[k], curseq, curlen, M, enctab, beststrand, 
            bedist, lambda, H, K, maxevalue, acc, D, dim, &npmatch, &bpscr);
 
        if(npmatch > 0) {
            setMatches(&read, pmatches, npmatch, PLUSSTRAND);
        }

      } else {
        printf("#%d %s\n",curlen, reads->seqs[k]->description);
        dumpkdseeds(s, V, curlen, '+', M);
      }

      for(j=0; j < curlen; j++) {
        if (V[j].noofbranches > 0) {
            FREEMEMORY(space, V[j].branches);
        }
      }
      FREEMEMORY(space, V);

      /*search the complement*/
      buffer = charDNAcomplement(space, curseq, curlen);
      V=kdseeds(space, s, buffer, curlen, s_ext, p_mis, Xoff, k_p);
#ifdef KDUNINFORMATIVE
      if(uninformative && V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) {
        minusdiff = V[0].branches[0].r - V[0].branches[0].l;
        uninformative = 1;
      } else
#endif
      if(!matchingstat) {
        uninformative = 0;
        mmatches = alignkdmatches(space, s, V, reads->seqs[k], buffer, curlen, M, enctab, beststrand, 
            bedist, lambda, H, K, maxevalue, acc, D, dim, &nmmatch, &bmscr);

        if(nmmatch > 0) {  
          setMatches(&read, mmatches, nmmatch, MINUSSTRAND);
        }

        if (best) {
          bpscr = MIN(bmscr, bpscr);
          bmscr = bpscr;
        } else if (!beststrand) {
          bpscr = bmscr = curlen-ceil((acc*curlen)/100);
        }

        setReads(&map, &read, 1);
        reportMatch(dev, &map, rep_type, showalignment, mtx, bpscr, bmscr);
       
        FREEMEMORY(space, pmatches);
        FREEMEMORY(space, mmatches);
        pmatches = NULL;
        mmatches = NULL;

      } else {
        dumpkdseeds(s, V, curlen, '-', M); 
      }

      for(j=0; j < curlen; j++) {
        if (V[j].noofbranches > 0) {
          FREEMEMORY(space, V[j].branches);
        }
      }
      FREEMEMORY(space, V);
      FREEMEMORY(space, buffer);  
    }
      
    if(nomatchdev && nmmatch == 0 && npmatch == 0) {   
        if (mtx2 != NULL) pthread_mutex_lock(mtx2);
        fprintf(nomatchdev, "%s\n%s\n", reads->seqs[k]->description, reads->seqs[k]->sequence);
        fflush(nomatchdev);
        if (mtx2 != NULL) pthread_mutex_unlock(mtx2);
    }
  }
  wrapBitmatrix(space, D, 2*(dim+1));
  FREEMEMORY(space, D);
  FREEMEMORY(space, enctab);
  return;
}