예제 #1
0
파일: sufmatch.c 프로젝트: spundhir/RNA-Seq
double
scorefilter (void *space, Matchtype *m, IntSequence *a, IntSequence *b,
             Uint *ptr, Uint len, Uint pos, void *info) {
    Uint l;
    double temp = 0;
    double sum = 0;
    imbissinfo *imbiss;

    imbiss=(imbissinfo*) info;

    m->count++;
    m->pos = ALLOCMEMORY(space, m->pos, Uint, m->count);
    m->org = ALLOCMEMORY(space, m->org, Uint, m->count);
    m->pos[(m->count)-1]=pos;
    m->org[(m->count)-1]=pos;

    for (l=0; l < len; l++) {
        temp = ((imbissinfo*)info)->score[(Uint)*ptr];
        sum += temp;
        m->score += temp;
        ptr++;
    }

    m->blast = m->blast > sum ? m->blast : sum;

    imbiss->consensus[pos] += (Uint) 1
                              /*((double)imbiss->lambda*sum)*/;

    return sum > 0 ? sum : 0;
}
예제 #2
0
Suffixarray*
constructSufArr(void *space, IntSequence **s, Uint len, FAlphabet* alphabet)
{
 	Uint i, numofsuffixes,
		 *sorted, 
		 *inv_suftab,
		 **suffixptr;
	MultiIntSeq *mseq;
	Suffixarray *arr;
	
	mseq = concatIntSequences(space, s, len, 4000, 4001);
	numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 
	
	inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
	arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);
	
	suffixptr = getSuffixPtr(space, mseq, 4000, 4001);
	sorted = quickSortMultikey (space, suffixptr, numofsuffixes, 
	cmpIntSequence, numofsuffixes-1, NULL);
		
	for (i=0; i < numofsuffixes; i++) {
		inv_suftab[sorted[i]]=i;
	}

	arr->seq = mseq;
	arr->numofsuffixes = numofsuffixes;
	arr->suffixptr = suffixptr;
	arr->suftab = sorted;
	arr->inv_suftab = inv_suftab;
	
	return arr;
}
void
constructchildtab(void *space, Suffixarray *s) {

  Uint i;
  int lastIndex = -1;
  Stack *stack;

  s->chldtab = ALLOCMEMORY(space, NULL, childtab, s->numofsuffixes);
  memset(s->chldtab, 0, s->numofsuffixes*sizeof(childtab));
  stack = ALLOCMEMORY(space, NULL, Stack, 1);
  initStack(space, stack, 100000);

  stackpush(space, stack, 1);

  for(i=1; i < s->numofsuffixes; i++) 
  {
    while(s->lcptab[i] < s->lcptab[stacktop(stack)]) {
      lastIndex = stackpop(stack);
      if(s->lcptab[i] <= s->lcptab[stacktop(stack)] && 
          s->lcptab[stacktop(stack)] != s->lcptab[lastIndex])
      {
        s->chldtab[stacktop(stack)].down = lastIndex;
        
        if (s->chldtab[stacktop(stack)].val != 0) printf("down conflict\n");
        s->chldtab[stacktop(stack)].val  = lastIndex;
      }
    }
    if (lastIndex != -1) {
      s->chldtab[i].up = lastIndex;
      
      if (s->chldtab[i-1].val != 0) printf("up conflict\n");
      s->chldtab[i-1].val = lastIndex;
      lastIndex = -1;
    }
    stackpush(space, stack, i);
  }

  /*construction of nextlIndex value*/
  destructStack(space, stack);
  initStack(space, stack, 10000);
  stackpush(space, stack,0);

  for(i=1; i < s->numofsuffixes; i++) {
    while(s->lcptab[i] < s->lcptab[stacktop(stack)]) {
      stackpop(stack);
    }
    if (s->lcptab[i] == s->lcptab[stacktop(stack)]) {
      lastIndex = stackpop(stack);
      s->chldtab[lastIndex].nextlIndex = i;  
      s->chldtab[lastIndex].val = i;
    }
    stackpush(space, stack, i);
  }

  return;
}
예제 #4
0
void
constructLcp (void *space, Suffixarray *arr)
{
  	Uint i, j, k;
	Uint s,t;
	int l=0;	
	
	arr->lcptab = ALLOCMEMORY(space, NULL, Uint, arr->numofsuffixes);
	
	for(i=0; i < arr->numofsuffixes; i++) {
	  	j = arr->inv_suftab[i];
		
		if (j > 0) {
		  	k = arr->suftab[j-1];
			s = arr->suffixptr[k]-arr->seq->sequences;
			t = arr->suffixptr[i]-arr->seq->sequences;
			
			l=l-1;
			if (l < 0) l=0;
						
			while ((s+l < arr->seq->totallength) && (t+l < arr->seq->totallength) && (arr->seq->sequences[s+l] == arr->seq->sequences[t+l])){ 
				l++;
			}
			arr->lcptab[j] = l;
		}
	}
	return;
}
예제 #5
0
파일: sort.c 프로젝트: spundhir/RNA-Seq
Uint *quickSort(void *space, void* toSort, Uint size, 
		Uint (*cmp)(Uint, Uint, void *, void*),
		void *info) {
  int left, left2, right, right2;
  PairSint ins, *lr;
  Uint i, resc, *sorted, x;
  VStack vstack;
	
  sorted = ALLOCMEMORY(space, NULL, Uint, size);
  for (i=0; i < size; i++) sorted[i]=i;
  ins.a = 0;
  ins.b = size-1;
  bl_vstackInit(&vstack, 10000, sizeof(PairSint));
  bl_vstackPush(&vstack, &ins);
   
  while (!bl_vstackIsEmpty(&vstack)){
    lr = (PairSint *) bl_vstackPop(&vstack, NULL);
    left = lr->a;
    right = lr->b;
    free(lr);
    while (left < right) {
      x=sorted[(left+right)/2];
      left2  = left;
      right2 = right;
	
      do {
	while(cmp(sorted[left2],  x, toSort, info)==2){	
	  left2++;
	}
	while(cmp(sorted[right2], x, toSort, info)==1){ 
	  right2--;
	}
			
	if(left2 <= right2) {
	  resc = sorted[right2];
	  sorted[right2]=sorted[left2];
	  sorted[left2]=resc;
	  left2++;
	  right2--;
	} 	
      } while (right2 >= left2);
			

      if ((left2-left) > (right-left2))  {		
	ins.a = left;
	ins.b = right2;
	bl_vstackPush(&vstack, &ins);
	left  = left2;
      } else {
	ins.a = left2;
	ins.b = right;
	bl_vstackPush(&vstack, &ins);
	right = right2;
      }
    }
  }
  bl_vstackDestruct(&vstack, NULL);
  return sorted;
}
예제 #6
0
  Suffixarray*
constructSufArr(void *space, 
    CharSequence **s, 
    Uint len, 
    FAlphabet* alphabet)
{

  Uint i, numofsuffixes,
  *sorted, 
  *inv_suftab;
  char **suffixptr;
  MultiCharSeq *mseq; 
  Suffixarray *arr;

  mseq = concatCharSequences(space, s, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 

  //printf("md5: %s\n", MD5((unsigned char*)"f**k",4,NULL));
  fprintf(stderr, "alphabet of size (%d): %s\n", mseq->mapsize, mseq->map);
  //fprintf(stderr, "allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint));
  inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
  arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);

  fprintf(stderr, "constructing suftab.\n");
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);

  //mseq->sequences[numofsuffixes-1]=0; 
  sorted = alurusort(space, mseq->sequences, &(numofsuffixes));
  //  mseq->sequences[numofsuffixes-1]=save;

  /*  sorted = quickSortMultikey (space, suffixptr, numofsuffixes, 
      cmpCharSequence, numofsuffixes-1, NULL);     
      */
  fprintf(stderr, "constructing inv_suftab.\n");
  for (i=0; i < numofsuffixes; i++) {
    inv_suftab[sorted[i]]=i;
  }

  arr->seq = mseq;
  arr->numofsuffixes = numofsuffixes;
  arr->suffixptr = suffixptr;
  arr->suftab = sorted;
  arr->inv_suftab = inv_suftab;

  return arr;
}
예제 #7
0
Suffixarray *
readSuffixarray(void *space, 
                char *idxfilename, 
                CharSequence **seqs,
                Uint len) {
  FILE *fp; 
  char **suffixptr; 
  Uint     nmemb = 0,
           idvmemb = 0,
           llvmemb = 0,
           numofsuffixes,
           *suftab = NULL,
           *suflink = NULL;
  childtab *chldtab = NULL; 
  unsigned char flags=0,
                *lcpctab = NULL;
  signed char   *id = NULL;
  PairUint *llvtab = NULL;
  PairSint *idvtab = NULL;
  MultiCharSeq *mseq;
  Suffixarray *s;

  mseq = concatCharSequences(space, seqs, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);

  fp = fopen(idxfilename, "r");
  if (fp == NULL) {
    fprintf(stderr,"Couldn't open file '%s'. Exit forced.\n", idxfilename);
    exit(-1);
  }

  fread(&nmemb, sizeof(Uint), 1, fp);
  suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb);
  fread(suftab, sizeof(Uint), nmemb, fp);
  fread(&flags, sizeof(char), 1, fp);

  if (flags & LCP_TAB_STORED) {
    fprintf(stderr, "reading lcpc/vtab\n");
    lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb);
    fread(lcpctab, sizeof(unsigned char), nmemb, fp);
    
    fread(&llvmemb, sizeof(Uint), 1, fp);
    llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb);
    fread(llvtab, sizeof(PairUint), llvmemb, fp);
  }
예제 #8
0
 Uint *quickSort(void *space, void* toSort, Uint size, 
 					Uint (*cmp)(Uint, Uint, void *, void*),
					void *info) {
 	Stackelement left, left2, right, right2;
	Uint i, resc, *sorted, x;
	Stack stack;
	
	sorted = ALLOCMEMORY(space, NULL, Uint, size);
	for (i=0; i < size; i++) sorted[i]=i;
	
	initStack(space, &stack, 10000);	
	stackpush(space, &stack, 0);
	stackpush(space, &stack, size-1);	
   
	while (!stackisempty(&stack)) {
		right=stackpop(&stack);
		left=stackpop(&stack);	
		
		while (left < right) {
			x=sorted[(left+right)/2];
			left2  = left;
			right2 = right;
	
			do {
			    while(cmp(sorted[left2],  x, toSort, info)==2){	
				  left2++;
				}
				while(cmp(sorted[right2], x, toSort, info)==1){ 
				  right2--;
				}
			
				if(left2 <= right2) {
						resc = sorted[right2];
						sorted[right2]=sorted[left2];
						sorted[left2]=resc;						
						left2++;
						right2--;
			 	} 	
			} while (right2 >= left2);
			

			if ((left2-left) > (right-left2))  {
			/*if ((right2-left) > (right-left2)) {*/
				stackpush(space, &stack, left);
				stackpush(space, &stack, right2);		
				left  = left2;
			} else {
				stackpush(space, &stack, left2);
				stackpush(space, &stack, right);
				right = right2;
			}
		}
	}
	destructStack(space, &stack);	
 	return sorted;
 }
예제 #9
0
fasta_t* 
initfasta(void *space) {
  fasta_t *f;

  f = ALLOCMEMORY(space, NULL, fasta_t, 1);
  f->seqs = NULL;
  f->noofseqs = 0;

  return f;
}
  Suffixarray*
constructSufArr(void *space, 
    CharSequence **s, 
    Uint len, 
    FAlphabet* alphabet)
{
  Uint i, numofsuffixes,
  *sorted, 
  *inv_suftab;
  char **suffixptr;
  MultiCharSeq *mseq;
  Suffixarray *arr;

  mseq = concatCharSequences(space, s, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 

  printf("allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint));
  inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
  arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);


  printf("constructing suftab.\n");
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);
  sorted = quickSortMultikey (space, suffixptr, numofsuffixes, 
      cmpCharSequence, numofsuffixes-1, NULL);

  printf("constructing inv_suftab.\n");
  for (i=0; i < numofsuffixes; i++) {
    inv_suftab[sorted[i]]=i;
  }

  arr->seq = mseq;
  arr->numofsuffixes = numofsuffixes;
  arr->suffixptr = suffixptr;
  arr->suftab = sorted;
  arr->inv_suftab = inv_suftab;

  return arr;
}
예제 #11
0
void
getimbissblast(void *space, IntSequence *query, IntSequence **seqs, 
	Uint noofseqs, FAlphabet *alphabet, imbissinfo *imbiss){
  
	double *df, *sf, *scr;
	double avgsum=0, inputscr=0, lambda=0, K=0;
	Uint *sortind, i;
	evdparam *evd;

	/*frequency of query and database*/
	df = dbfreq(space, seqs, noofseqs, alphabet, 1);
	sf = seqfreq(space, query, alphabet);	
	scr  = logoddscr(space, df, sf, alphabet);
	
	for (i=0; i < alphabet->domainsize; i++) avgsum += df[i]*scr[i];
	for (i=0; i < query->length; i++) inputscr += scr[query->sequence[i]]; 
	
	sortind = quickSort(space, scr, alphabet->domainsize, cmp_dbl, NULL);
	
	evd=ALLOCMEMORY (space, NULL, evdparam, 1);
	evd->noofscores = alphabet->domainsize;
	evd->probs =  df;
	evd->scores = scr;

	lambda = uniroot(0, 1, score_evd, 0.0000001, evd); 	
	FREEMEMORY(space, evd);
	
	K = relentropy(space, sortind, scr, alphabet->domainsize, df, lambda);	
	if (K <= 0) K=1;

	imbiss->score = scr;
	imbiss->H = 0;
	imbiss->K = K;
	imbiss->lambda = lambda;

	
	printf("\nBLAST statistics:\n-------------------\n");
	printf("E(score): %f\n", avgsum);
	printf("inputscr: %f\n", inputscr);
	printf("lambda: %19.16e\n", lambda); 
	printf("check: %19.16e\n", 
	checklambda(scr, alphabet->domainsize, df, avgsum, lambda)); 
	printf("K: %19.16e\n\n", K);
	
	FREEMEMORY(space, sortind);
	FREEMEMORY(space, df);
	FREEMEMORY(space, sf);
	
	return;
}
예제 #12
0
void
bl_radixSortKeyFirst(void *space, void *tosrt, 
			 size_t size, size_t nelem, 
			 Uint bits) {
	
	char *p, *b, *src, *toSort;
	Uint *cast;
	
	Uint mask, offset=0, i, key;
	Uint cntsize;
	Uint *cnt;
	
    toSort = (char*) tosrt;
	cntsize = 1 << bits;
	cnt = ALLOCMEMORY(space, NULL, Uint, cntsize);

	memset(cnt, 0, sizeof(Uint)*cntsize);
	b = src = malloc(size*nelem);
	
	mask =~ (UINT_MAX<<bits);
	
	for(; mask; mask <<= bits, offset+=bits) {
		for(p=toSort; p < toSort+(nelem*size); p+=size) {
			cast = (Uint*)p;
			key = (*cast & mask) >> offset;
			++cnt[key];
		}
		
		for(i=1; i < cntsize; ++i) {
			cnt[i]+=cnt[i-1];
		}
		
		for(p=toSort+((nelem-1)*size); p >= toSort; p-=size) {
			cast = (Uint*)p;
			key = (*cast & mask) >> offset;
			memmove(b+((cnt[key]-1)*size), p, size);
			--cnt[key];
		}
		
		p=b; b=toSort; toSort=p;
		memset(cnt, 0, sizeof(Uint)*cntsize);
	}
	
	if(toSort == src) memcpy(b, toSort, size*nelem);
	FREEMEMORY(space, src);
	FREEMEMORY(space, cnt);
	
	return;
}
예제 #13
0
vector_t *decode_2tupel_cantor(void *space, Uint i) {
	Uint j,y,x;
	vector_t *v=NULL;
	
	v=ALLOCMEMORY(space, v, vector_t, 1);
	INITVECTOR(v);

	j = q_cantor(i);
	y = i-f_cantor(j);
	x = j-y;
	
	APPENDVEC(space, v, x);
	APPENDVEC(space, v, y);

	return v;
}
void
addinterval(void *space, List *list, Uint a, Uint b) {
  PairUint *range;

  range = ALLOCMEMORY(space, NULL, PairUint, 1);
  range->a=a;
  range->b=b;
 
  if (list->length == 0) {
    insertAfter(space, list, LISTNILVALUE, range);
  } else {
    insertAfter(space, list, list->lastNode, range);
  }

  return;
}
예제 #15
0
void
bl_radixSortUint(void *space, Uint *toSort, 
					 size_t nelem, 
					 Uint bits) {
	
	Uint *p, *b, *src;

	Uint mask, offset=0, i, key;
	Uint cntsize;
	Uint *cnt;
	
	cntsize = 1 << bits;
	cnt = ALLOCMEMORY(space, NULL, Uint, cntsize);
	
	memset(cnt, 0, sizeof(Uint)*cntsize);
	b = src = malloc(sizeof(Uint)*nelem);
	
	mask =~ (UINT_MAX<<bits);
	
	for(; mask; mask <<= bits, offset+=bits) {
		for(p=toSort; p < toSort+nelem; ++p) {
			key = (*p & mask) >> offset;
			++cnt[key];
		}
		
		for(i=1; i < cntsize; ++i) {
			cnt[i]+=cnt[i-1];
		}
		
		for(p=toSort+((nelem-1)); p >= toSort; --p) {			
			key = (*p & mask) >> offset;
			b[cnt[key]-1] = *p;
			--cnt[key];
		}
		
		p=b; b=toSort; toSort=p;
		memset(cnt, 0, sizeof(Uint)*cntsize);
	}
	
	if(toSort == src) memcpy(b, toSort, sizeof(Uint)*nelem);
	FREEMEMORY(space, src);
	FREEMEMORY(space, cnt);
	
	return;
}
예제 #16
0
vector_t *decodeCantor(void *space, Uint code, Uint n) {
	Uint i;
	vector_t *v = NULL;
	vector_t *r = NULL;
	
	v=ALLOCMEMORY(space, v, vector_t, 1);
	INITVECTOR(v);

	for (i=0; i < (n-1); i++) {
		r = decode_2tupel_cantor(space, code);		
		APPENDVEC(space, v, VECTOR(r,0));
		code = VECTOR(r,1);

		FREEMEMORY(space, r);
	}

	APPENDVEC(space, v, VECTOR(r,1));
	return (v);
}
예제 #17
0
파일: sufmatch.c 프로젝트: spundhir/RNA-Seq
PairSint*
sufSubstring (void *space, Suffixarray *arr, Uint *pattern,
              Uint len, Uint sublen)
{
    Uint i;
    PairSint *res, d;

    if (len <= sublen)
    {
        return NULL;
    }

    res = ALLOCMEMORY(space, NULL, PairSint, len-sublen);
    for(i=0; i < len-sublen; i++)
    {
        d=mmsearch(arr, &pattern[i], sublen, 0, 0, arr->numofsuffixes-1);
        res[i].a=d.a;
        res[i].b=d.b;
    }

    return res;
}
예제 #18
0
파일: sufarray.c 프로젝트: spundhir/RNA-Seq
      if(!silent) MSG("reading uint id.\n");
      fread(idvutab, sizeof(PairUint), idvmemb, fp);
      for(idvi=0; idvi < idvmemb; idvi++) {
        idvtab[idvi].a = idvutab[idvi].a;
        idvtab[idvi].b = idvutab[idvi].b;
      }
      free(idvutab);
    }
  }

  if ((flags & MD5_STORED)) {
    mdfive = ALLOCMEMORY(space, NULL, unsigned char, 16);
    fread(mdfive, sizeof(unsigned char), 16, fp);
  }

  s = ALLOCMEMORY(space, NULL, Suffixarray, 1);
        
  if ((flags & LINT_SUFLINKS)) 
  s->llint = 1; else s->llint=0;
  s->suftab = suftab;
  s->seq = mseq;
  s->numofsuffixes = numofsuffixes;
  s->lcpctab = lcpctab;
  s->llvtab = llvtab;
  s->llvcnt = llvmemb;
  s->inv_suftab=NULL;
  s->chldtab = chldtab;

#ifdef SUFLINK_MMAP
  s->suflink = (Uint*) &suflinkptr[pagediff_sl];
  s->id = &id[pagediff_id];
예제 #19
0
int main(int argc, char** argv) {
  char* content;
  Uint contentlen, i, j, k, l, id, lines=0;
  stringset_t *set, *set2, **csv, *que;
  CharSequence **s;
  Suffixarray *sarray;
  MultiCharSeq *mseq;
  PairSint d, *matches  = NULL;
  Uint totallength = 0;
  Uint wsize=10;
  Uint counter=0;
  Uint all=0;
  int *space = NULL;
  char *pattern=  "GGAAGAAAGCGTGGGGTTTG";
  char *pattern2= "TGATTAGTGATTAGTGATTA";
  char *pattern3= "ACAAACATAT";
  char *start;
  time_t startsuf, endsuf; 
  double difsuf;
  Uint noofchildren;
  List *list;
  PairUint **childinterval;
  gnuplot_ctrl *h;
  double *genome;
  
  //set = readfasta(&space, "HP26695.fasta");
  //csv = readcsv(&space, "HP12_GCTC.inserts", "", &lines); 
  /*s = ALLOCMEMORY(&space, NULL, CharSequence *, set->noofstrings);   


   for(i=0; i < set->noofstrings/2; i++) {
    totallength += set->strings[i].len; 

    s[i] = ALLOCMEMORY(&space, NULL, CharSequence, 1);
    s[i]->sequence = set->strings[i].str;
    s[i]->length = set->strings[i].len;
    /  printf("%s,", set->strings[i].str);
        printf("\n"); / 
  }*/



  s = ALLOCMEMORY(&space, NULL, CharSequence *, 1);
  s[0] = ALLOCMEMORY(&space, NULL, CharSequence, 1);
  s[0]->sequence = pattern3;
  s[0]->length = strlen(pattern3);


  genome = ALLOCMEMORY(&space, NULL, double, totallength);
  memset(genome, 0, sizeof(double)*totallength);


  time (&startsuf);
  sarray = constructSufArr(&space, s, 1/*set->noofstrings/2*/, NULL); 
  constructLcp(space, sarray);
  dumplcptab(sarray);
  constructchildtab(space, sarray);
  time (&endsuf);
  difsuf = difftime (endsuf, startsuf);

  printf("noofsuffixes: %d\n", sarray->numofsuffixes);

  dumpchildtab(sarray);
  dumpSufArr(sarray);
  
  list = getChildintervals(space, sarray, 0, 5);
  childinterval = (PairUint**) dataList(space, list);
  for(i=0; i < list->length; i++) {
    printf("[%d,%d]\n", childinterval[i]->a, childinterval[i]->b);
  }
  
  constructsuflinks(space, sarray);

  for(k=1; k < lines; k+=2) {
//    printf("searching %s\n", csv[k]->strings[0].str);

  
    if(csv[k]->strings[0].len > 8) {  
    if(wsize > csv[k]->strings[0].len) {
      d=mmsearch(sarray, csv[k]->strings[0].str, csv[k]->strings[0].len, 0, 0, sarray->numofsuffixes-1);
  //    printf("suffixes were found at positions (%d, %d)\n",d.a, d.b);
        for  (j=d.a; j <= d.b; j++) {
            genome[sarray->suftab[j]]++;
         }

    } else {
      for(l=0; l < csv[k]->strings[0].len-wsize; l++) { 
         d=mmsearch(sarray, &csv[k]->strings[0].str[l], wsize, 0, 0, sarray->numofsuffixes-1);
    //    printf("suffixes were found at positions (%d, %d)\n",d.a, d.b);
        
        
     for  (j=d.a; j <= d.b; j++) {
            genome[sarray->suftab[j]]++;

       /*   start = sarray->suffixptr[sarray->suftab[j]];
          printf("pattern was: %s\n", &csv[k]->strings[0].str[l]);
          printf("suffix found: ");
          for (i=0; i < wsize; i++) {
          printf("%c", start[i]);
          }
          printf("\n");
          id = getMultiCharSeqIndex(sarray->seq, sarray->suffixptr[sarray->suftab[j]]);	
          printf("found in sequence: %d\n", id); */
        } 
      }
    }
    if (d.a < d.b) counter++;
    all++;}
  }
  
  destructStringset(&space, set);
  writeY("out.xy", genome, totallength);

  /*h = gnuplot_init();
  gnuplot_setstyle(h, "points");
  
  gnuplot_cmd(h, "set title 'IMBISS - seed statistics' -28,0 font'Helvetica,15'");	
  gnuplot_cmd(h, "set label 'seed length: %d' at graph 0.05,0.95 font 'Helvetica, 12'", totallength);
  gnuplot_set_xlabel(h, "matches");
  gnuplot_set_ylabel(h, "position");	
  gnuplot_plot_x(h, genome, totallength, "position");
  */

  printf ("sliding windows of %d sequences (of %d) found\n", counter, all);
  printf ("Building  the suffixarray has taken %f seconds.\n", difsuf);
  printf ("Total length of suffixarray was %d\n", totallength);
  while(1);
  return EXIT_SUCCESS;
}
예제 #20
0
파일: sufarray.c 프로젝트: spundhir/RNA-Seq
Suffixarray *
readSuffixarray(void *space, 
    char *idxfilename, 
    CharSequence **seqs,
    Uint len,
    unsigned char silent) {
  FILE *fp; 
  Uint     nmemb = 0,
           idvmemb = 0,
           llvmemb = 0,
           numofsuffixes,
           *suftab = NULL,
           idvi =0;
  childtab *chldtab = NULL; 
  unsigned char flags=0,
                *lcpctab = NULL;
  unsigned char *mdfive=NULL,
                *check=NULL;
  PairUint *llvtab = NULL;
  PairLSint *idvtab = NULL;
  PairSint *idvutab = NULL;

  MultiCharSeq *mseq;
  Suffixarray *s;

#ifdef SUFLINK_MMAP
  int fd;
  signed char   *id = NULL;
  long curiopos, offset;
  struct stat sb;
  char *suflinkptr;
  int pagediff_id;
  int pagediff_sl;
#elif SUFLINK_DISKACC
  int fd;
  off_t off_sl;
  off_t off_id;
#else
  signed char   *id = NULL;
  Uint *suflink = NULL;
#endif
  
  mseq = concatCharSequences(space, seqs, len, (char)126, (char)127);
  numofsuffixes = mseq->totallength; 

  fp = fopen(idxfilename, "r");
  if (fp == NULL) {
    DBG("Couldn't open file '%s'. Exit forced.\n", idxfilename);
    exit(-1);
  }

  fread(&nmemb, sizeof(Uint), 1, fp);
  suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb);
  fread(suftab, sizeof(Uint), nmemb, fp);
  fread(&flags, sizeof(char), 1, fp);

  if (flags & LCP_TAB_STORED) {
    if (!silent) MSG("reading lcpc/vtab.\n");
    lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb);
    fread(lcpctab, sizeof(unsigned char), nmemb, fp);

    fread(&llvmemb, sizeof(Uint), 1, fp);
    llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb);
    fread(llvtab, sizeof(PairUint), llvmemb, fp);
  }

  if (flags & CHLD_TAB_STORED) {
    if(!silent) MSG("reading childtab.\n");
    chldtab = ALLOCMEMORY(space, NULL, childtab, nmemb);
    fread(chldtab, sizeof(childtab), nmemb, fp);
  }

  if ((flags & SUFLINK_TAB_STORED)) {
    if(!silent) MSG("reading suflinks.\n");

#ifdef SUFLINK_MMAP 
    curiopos = ftell(fp);
    fd = open(idxfilename, O_RDONLY);
    if (fd == -1) {
      perror("open");
      exit(EXIT_FAILURE);
    }

    if (fstat(fd, &sb) == -1) {       
      perror("fstat");
      exit(EXIT_FAILURE);
    }

    offset = curiopos & ~(sysconf(_SC_PAGE_SIZE) - 1);
    if (curiopos >= sb.st_size) {
      fprintf(stderr, "offset is past end of file\n");
      exit(EXIT_FAILURE);
    }
    
    pagediff_sl = curiopos - offset;   
    suflinkptr = mmap(0, nmemb*sizeof(Uint) + pagediff_sl, PROT_READ, MAP_SHARED, fd, offset);

    if (suflinkptr == MAP_FAILED) {
      perror("mmap");
      exit(EXIT_FAILURE);
    }
#elif SUFLINK_DISKACC 
    sl_diskacc = 1;
    off_sl = ftell(fp);
    fd = open(idxfilename, O_RDONLY);
#else
    suflink = ALLOCMEMORY(space, NULL, Uint, nmemb);
    fread(suflink, sizeof(Uint), nmemb, fp);
#endif

#ifdef SUFLINK_MMAP
    offset = (curiopos+(nmemb*sizeof(Uint))) & ~(sysconf(_SC_PAGE_SIZE) - 1);
    if (curiopos >= sb.st_size) {
      fprintf(stderr, "offset is past end of file\n");
      exit(EXIT_FAILURE);
    }
    
    pagediff_id = (curiopos+(nmemb*sizeof(Uint))) - offset;   
    id = mmap(0, nmemb*sizeof(signed char) + pagediff_id, PROT_READ, MAP_SHARED, fd, offset);

    if (id == MAP_FAILED) {
      perror("mmap");
      exit(EXIT_FAILURE);
    }
    fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR); 

#elif SUFLINK_DISKACC
    off_id = off_sl+(nmemb*sizeof(Uint));
    fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR);    
#else   
    id = ALLOCMEMORY(space, NULL, signed char, nmemb);
    fread(id, sizeof(signed char), nmemb, fp);
#endif

    fread(&idvmemb, sizeof(Uint), 1, fp);
    idvtab = ALLOCMEMORY(space, NULL, PairLSint, idvmemb);
    if ((flags & LINT_SUFLINKS)) {
      if(!silent) MSG("reading lsint id.\n");
      fread(idvtab, sizeof(PairLSint), idvmemb, fp);
    } else { 
      idvutab = ALLOCMEMORY(space, NULL, PairSint, idvmemb);
      if(!silent) MSG("reading uint id.\n");
      fread(idvutab, sizeof(PairUint), idvmemb, fp);
      for(idvi=0; idvi < idvmemb; idvi++) {
        idvtab[idvi].a = idvutab[idvi].a;
        idvtab[idvi].b = idvutab[idvi].b;
      }
      free(idvutab);
    }
  }

  if ((flags & MD5_STORED)) {
    mdfive = ALLOCMEMORY(space, NULL, unsigned char, 16);
    fread(mdfive, sizeof(unsigned char), 16, fp);
  }
예제 #21
0
파일: kdmatch.c 프로젝트: spundhir/RNA-Seq
void
matchkdseed( void *space, 
            Suffixarray *s, 
            fasta_t *reads, 
            Uint minsize,
            char *outfile,
            Uint *counter,
            unsigned char silent,
            Uint s_ext,
            Uint p_mis,
            Uint Xoff,
            Uint k_p,
            Uint rep_type,
            Uint hitstrategy,
            Uint bedist,
            unsigned char showalignment,
            double maxevalue,
            int acc,      
            Uint M,
            unsigned char matchingstat,
            FILE *dev,
            FILE *nomatchdev) {

  double   H,
           K,
           lambda;
  char     *buffer, 
           *curseq;
  Uint     k,
           curlen,
           dim,
           wordno;
  bitvector *D,
            *Mv;
  Gmap      map;
  gread_t   read;
  gmatch_t  *mmatches=NULL,
            *pmatches=NULL;
  matchstem_t *V;

  int plusdiff, minusdiff, noofmatches=0;


  
  pthread_mutex_t *mtx=NULL;
  pthread_mutex_t *mtx2=NULL;
  Uint *enctab, i,j, nmmatch, npmatch;

  int bmscr, bpscr;
  unsigned char uninformative = 0,
                beststrand = 0,
                best = 0;
    
  if (hitstrategy == 2) { 
    beststrand = 1;
    best = 1;
  } else if (hitstrategy == 1) {
    beststrand = 1;
  }


  /*build alignment matrix*/
  enctab = encodetab(s->seq->map, s->seq->mapsize);
  dim = reads->maxlen + 2*((reads->maxlen-ceil((acc*reads->maxlen)/100))+4);
  wordno = reads->maxlen/BITVECTOR_WORDSIZE;
//  wordno += ((reads->maxlen & (BITVECTOR_WORDSIZE-1)) > 0) ? 1 : 0;
  wordno++;

  D = ALLOCMEMORY(space, NULL, bitvector, 2*(dim+1));
  Mv = &D[dim+1];

  for(i=0; i <= dim; i++) {
    D[i]  = initbitvector(space, wordno*BITVECTOR_WORDSIZE);
    Mv[i]  = initbitvector(space, wordno*BITVECTOR_WORDSIZE);
  }  

  if (counter == NULL) {
    initProgressBarVT();
  } else {
    mtx = &mutex1;
    mtx2 = &mutex2;
  }

  karlinunitcostpp(space, &lambda, &H, &K);
  
  for (k=0; k < reads->noofseqs; k++) {
    plusdiff = 0;
    minusdiff = 0;
    noofmatches = 0;

    if (!silent) {
      if (counter == NULL) {
        progressBarVT("reads matched.", reads->noofseqs, k, 25);
      } else {
        (*counter)++;
      }
    }

    curseq = reads->seqs[k]->sequence;
    curlen = reads->seqs[k]->length; 
    npmatch = 0;
    nmmatch = 0;

    if(curlen >= minsize) {  
      initGmap(&map, s->seq, 1);
      initRead(&read, reads->seqs[k]);
      bpscr = 0;
      bmscr = 0;
      
      V=kdseeds(space, s, curseq, curlen, s_ext, p_mis, Xoff, k_p);
#ifdef KDUNINFORMATIVE
      if(V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) {
        uninformative = 1;
        plusdiff = V[0].branches[0].r - V[0].branches[0].l;
      } else 
#endif
      if(!matchingstat) {
        uninformative = 0;
        pmatches = alignkdmatches(space, s, V, reads->seqs[k], curseq, curlen, M, enctab, beststrand, 
            bedist, lambda, H, K, maxevalue, acc, D, dim, &npmatch, &bpscr);
 
        if(npmatch > 0) {
            setMatches(&read, pmatches, npmatch, PLUSSTRAND);
        }

      } else {
        printf("#%d %s\n",curlen, reads->seqs[k]->description);
        dumpkdseeds(s, V, curlen, '+', M);
      }

      for(j=0; j < curlen; j++) {
        if (V[j].noofbranches > 0) {
            FREEMEMORY(space, V[j].branches);
        }
      }
      FREEMEMORY(space, V);

      /*search the complement*/
      buffer = charDNAcomplement(space, curseq, curlen);
      V=kdseeds(space, s, buffer, curlen, s_ext, p_mis, Xoff, k_p);
#ifdef KDUNINFORMATIVE
      if(uninformative && V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) {
        minusdiff = V[0].branches[0].r - V[0].branches[0].l;
        uninformative = 1;
      } else
#endif
      if(!matchingstat) {
        uninformative = 0;
        mmatches = alignkdmatches(space, s, V, reads->seqs[k], buffer, curlen, M, enctab, beststrand, 
            bedist, lambda, H, K, maxevalue, acc, D, dim, &nmmatch, &bmscr);

        if(nmmatch > 0) {  
          setMatches(&read, mmatches, nmmatch, MINUSSTRAND);
        }

        if (best) {
          bpscr = MIN(bmscr, bpscr);
          bmscr = bpscr;
        } else if (!beststrand) {
          bpscr = bmscr = curlen-ceil((acc*curlen)/100);
        }

        setReads(&map, &read, 1);
        reportMatch(dev, &map, rep_type, showalignment, mtx, bpscr, bmscr);
       
        FREEMEMORY(space, pmatches);
        FREEMEMORY(space, mmatches);
        pmatches = NULL;
        mmatches = NULL;

      } else {
        dumpkdseeds(s, V, curlen, '-', M); 
      }

      for(j=0; j < curlen; j++) {
        if (V[j].noofbranches > 0) {
          FREEMEMORY(space, V[j].branches);
        }
      }
      FREEMEMORY(space, V);
      FREEMEMORY(space, buffer);  
    }
      
    if(nomatchdev && nmmatch == 0 && npmatch == 0) {   
        if (mtx2 != NULL) pthread_mutex_lock(mtx2);
        fprintf(nomatchdev, "%s\n%s\n", reads->seqs[k]->description, reads->seqs[k]->sequence);
        fflush(nomatchdev);
        if (mtx2 != NULL) pthread_mutex_unlock(mtx2);
    }
  }
  wrapBitmatrix(space, D, 2*(dim+1));
  FREEMEMORY(space, D);
  FREEMEMORY(space, enctab);
  return;
}
예제 #22
0
파일: kdmatch.c 프로젝트: spundhir/RNA-Seq
gmatch_t*
alignkdmatches(void *space,
    Suffixarray *s, 
    matchstem_t *M, 
    CharSequence *query,
    char *curseq,
    Uint m,
    Uint t,
    Uint *enctab,
    unsigned char bestonly,
    Uint bedist,
    double lambda,
    double H,
    double K,
    double maxevalue,
    int acc,
    bitvector* D,
    Uint dim,
    Uint* nmatch,
    int* bscr) {

  Uint k,j,l,r,q,noofmatches=0, mat, mis, ins, del; 
  Lint pos, margin, schr, echr, sstart, slen, i ;
  char *sseq;
  unsigned int idx;
  Alignment *al=NULL;
  bitvector *peq;
  PairSint result;
#ifdef ALIGNDBG
  PairSint result2;
  CharSequence *checkseq;
#endif
  double E;
  int scr;
  int maxedist = 0;
  int bestscr = 0;

  gmatch_t *matches=NULL;

  margin = bestscr = maxedist = m-ceil((acc*m)/100);
  peq = getpeq(NULL, curseq, m, s->seq->map, s->seq->mapsize, enctab);

  for(i=0; i < m; i++) {
    for(q=0; q < M[i].noofbranches; q++) {
      l = M[i].branches[q].l; r = M[i].branches[q].r;
      scr =  M[i].branches[q].mat - (M[i].branches[q].mis+M[i].branches[q].ins+M[i].branches[q].del); 
      E = evalue(lambda, K, spacemult(m, s->numofsuffixes, H, K), scr); 

      if(l <= r && E <= maxevalue && (r-l) <= t) {
        for(j=l; j <= r; j++) {
          pos = s->suftab[j];

          /*skip marginal matches*/
          for(k=0; k < noofmatches; k++) 
            if (abs((signed int)matches[k].p-(pos-(i+margin))) <= margin) break;

          if (k == noofmatches) {

            idx = getMultiCharSeqIndex(s->seq, &s->seq->sequences[pos]);
            schr = (idx > 0) ? s->seq->markpos[idx-1]+1 : 0;
            echr = s->seq->markpos[idx];
            assert(echr >=  pos);
            sstart = MAX(schr, pos-(i+margin));
            slen = (echr > sstart+m+2*(margin+1)) ? m+2*(margin+1) : (echr-sstart)+1;  
            sseq = &s->seq->sequences[sstart];
/*            
            if(echr == pos) {
              fprintf(stderr, "\n\n sstart:%lld, slen:%lld, ssend:%lld, echr:%lld\n\n",
                  sstart, slen, sstart+slen-1, echr);
            }
*/
            myersbitmatrix(NULL, curseq, m, sseq, slen, s->seq->map, 
                s->seq->mapsize, enctab, m-bestscr, peq, &result, D, slen);

#ifdef ALIGNDBG
            result2 = myersbitvector(NULL, curseq, m, sseq, slen, 
                s->seq->map, s->seq->mapsize, enctab, m-bestscr, peq);
            assert(result.a == result2.a && result.b == result2.b);
#endif

            if (result.a != -1 && result.b <= maxedist 
                && result.b <= bestscr && result.a < slen) {  
              al = ALLOCMEMORY(space, NULL, Alignment, 1);
              initAlignment(al, curseq, m, 0, sseq, slen, 0);
              bitvectorbacktrack(al, D, slen, m, result.a);
#ifdef ALIGNDBG
              assert(getEdist(al) == result.b);
              checkseq = (CharSequence*) s->seq->ref[idx].ref;
              assert(strncmp(& checkseq->sequence[pos-schr],
                    &s->seq->sequences[pos], slen) == 0);
#endif
              countEops(al, &mat, &mis, &del, &ins);

              /*skip identical matches*/
              for(k=0; k < noofmatches; k++) {
                if (matches[k].p == sstart+al->voff) break;
              }

              if (k == noofmatches) {
                matches=realloc(matches, sizeof(gmatch_t)*(noofmatches+1));
                matches[noofmatches].p = sstart+al->voff;
                matches[noofmatches].q = sstart+result.a-1;
                matches[noofmatches].edist = result.b;
                matches[noofmatches].i = i; 
                matches[noofmatches].j = i+M[i].branches[q].mat+
                  M[i].branches[q].mis+M[i].branches[q].ins-1;
                matches[noofmatches].scr = scr;
                matches[noofmatches].evalue = E;
                matches[noofmatches].mat = mat;
                matches[noofmatches].mis = mis;
                matches[noofmatches].ins = ins;
                matches[noofmatches].del = del;
                matches[noofmatches].subject = idx;
                matches[noofmatches].checklen = matches[noofmatches].j; 
                matches[noofmatches].al = al;
                noofmatches++;
                if(bestonly) {
                  bestscr = MIN(maxedist, (result.b+bedist));
                } 
              } else {
                wrapAlignment(al);
                FREEMEMORY(space, al);
              }
            }
          }
        }
      }
    }
  }


  for(i=0; i < s->seq->mapsize; i++) {
    FREEMEMORY(space, peq[i]);
  }

  FREEMEMORY(space, peq);      
  (*bscr)  = bestscr;
  (*nmatch) = noofmatches;

  return matches;
}
예제 #23
0
int
main (int argc, char** argv)
{	
 	Sint optindex, c;
	unsigned char depictsw=0;
	unsigned char wurst=1;
	unsigned char gnuplot=0;

	Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0;
	Uint noofhits=100;
	Uint substrlen = 10;
	Uint minseeds = 5;
	Uint maxmatches = 10000;
	char *seq, *vec, *bin;
	imbissinfo *imbiss;
	void *space = NULL;	
	double *scores = NULL;

	int swscores[2]={3,-2};
	char *pveclistfile=NULL;
	char *alphabetfile=NULL;	
	char *inputfile=NULL;
	char *batchfile = NULL;
	char *subfile=NULL;
	char *reportfile = NULL;

	 int (*handler)
	   (void *, Matchtype *, IntSequence **, Uint, 
		Uint, void *) = allscores;
	 
	 double (*filter) 
	   (void *, Matchtype *, IntSequence *, IntSequence *,
		 Uint *, Uint, Uint, void *) = swconstfilter;

	 Matchtype* (*select)
	   (void *, Matchtype *, Uint k, 
		IntSequence *, IntSequence **, void *) = selectSW;
  

	stringset_t **fn, **freq, *queryurl, **queries=NULL;
	Suffixarray *arr = NULL;	
	IntSequence **sequences = NULL;
	IntSequence *input = NULL;
	FAlphabet *alphabet = NULL;		
    PairSint *matches=NULL;
	Uint percent=0;
	
	time_t startsuf, endsuf; 
	double difsuf, difmatch, difrank;	

#ifdef MEMMAN_H 	
	Spacetable spacetab;
	initmemoryblocks(&spacetab, 100000);
	space = &spacetab;
#endif
	
	while(1) 
	{
		c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", 
						long_options, &optindex);
		if (c==-1) break;
		
		switch(c) {
		    case 'r':
			  reportfile=optarg;
			  break;
		    case 'v':
			  verbose_flag=1;	
			  break;
		    case 'd':
			  	depictsw = 1;
				break;
		    case 's':
				pveclistfile = optarg;	
				break;
			case 'a':
				alphabetfile = optarg;	
				break;
			case 'q':
				inputfile = optarg;
				noofqueries = 1;
				break;
			case 'l':
				substrlen = atoi(optarg);
				break;
			case 'c':
			  	minseeds = atoi(optarg);
			    break;
			case 'b':
				batchfile = optarg;
				break;
			case 'p':
				percent = atoi(optarg);
				break;
			case 'x':
				subfile = optarg;
				break;
			case 'n':
				noofhits = atoi(optarg);
				break;
			case 'w':
				wurst = 0;
				break;
			case 'B':
				filter = scorefilter;
				select = selectBlastScore;
				break;
			case 'S':
				filter = scorefilter;
				select = selectScore;
				break;
			case 'A':
				filter = swconstfilter;
				select = selectSW;
				break;
			case 'F':
				filter = scorefilter;
				select = selectScoreSWconst;
				break;
			case 'G':
				filter = scorefilter;
				select = selectBlastScoreSWconst;
				break;
			case 'M':
				swscores[0]=atoi(optarg);
				break;
			case 'L':
				handler = latexscores;
				break;
			case 'D':
				swscores[1]=atoi(optarg);
				break;
			case 'g':
				gnuplot = 1;
				break;
			case 'm':
				maxmatches=atoi(optarg);
				break;
			case 'h':
			default:
				usage(argv[0]);
				exit (EXIT_FAILURE);
		}
	}
	if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL)
		|| alphabetfile == NULL) {
		usage(argv[0]);
		exit (EXIT_FAILURE);
	}
	
	imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1);
	imbiss->reportfile = reportfile;
	imbiss->swscores = swscores;
	imbiss->noofhits = noofhits;
	imbiss->minseeds = minseeds; 	
	imbiss->wurst = wurst; 
	
	/*read batch file*/
	if (batchfile) {
		queries = readcsv(space, batchfile, "", &noofqueries);	
	}

	/*read substitution matrix*/
	if (subfile) {
		freq=readcsv(space, subfile,",", &nooffreqs);
		scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) );
		for(i=1; i < nooffreqs; i++) {
			for(j=1; j < nooffreqs; j++) {
				if(strcmp(SETSTR(freq[i],j),"inf")==0){
					MATRIX2D(scores, nooffreqs-1, i, j)=0;
				}else{
					MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j));
				}
			}
		}
	}
	
	/*read alphabet*/	
	if (alphabetfile != NULL) {
		alphabet = loadCSValphabet(space, alphabetfile);
		sortMapdomain(space, alphabet);
    }

	
	/*load sequence database*/
	fn=readcsv(space, pveclistfile, "", &noofseqs);
	sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs);
	for(i=0; i < noofseqs; i++) 
	{		  
		sequences[i] = loadSequence(space, SETSTR(fn[i],0));		
	}

	for (i=0; i < noofseqs; i++) {	
	  	destructStringset(space, fn[i]);
	}
	FREEMEMORY(space, fn);
	
	
	/*construct the suffix array*/
	time (&startsuf);
	arr = constructSufArr(space, sequences, noofseqs, NULL);
        constructLcp(space, arr); 	
   	time (&endsuf);
	difsuf = difftime (endsuf, startsuf);


	/*do search*/
    for (i=0; i < noofqueries; i++) {
	  
	    /*get query form batchfile*/
	  	if (queries) {
			inputfile = SETSTR(queries[i],0);
		}	
		
		/*typically only used with batchfile*/
		if (percent != 0) {
			substrlen = 
			  ((double)((double)input->length/100)*(double) percent);
		}
			
		input = loadSequence(space, inputfile);
		//seq = printSequence (space, input, 60); 
		printf(">IMBISS order delivered\n");	
		//printf("%s\n",seq);
		printf("%s\n", input->url); 
		//FREEMEMORY(space, seq);	
		
		time (&startsuf);
		matches=sufSubstring(space, arr, input->sequence, input->length, substrlen);	 
		time (&endsuf);
		difmatch = difftime (endsuf, startsuf);

		/*get prob vector url for salami/wurst*/
        //printf("%.*s\n", 5, input->url + 58);
        vec = malloc(sizeof(char)*66);
        sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56);
        bin = malloc(sizeof(char)*54);
        sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56);

		queryurl = initStringset(space);
		addString(space, queryurl, bin, strlen(bin));
		addString(space, queryurl, vec, strlen(vec));

		
        getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss);
	

		imbiss->query = queryurl;
		imbiss->substrlen = substrlen;
		imbiss->alphabet = alphabet;
		
		/*if a substition file was given ...*/
		if (subfile) {
			imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1);
		}
		
		/*match 'n' report*/
		time (&startsuf);
		
		imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen));
		memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen)));
	
		rankSufmatch(space, arr, matches, input->length-substrlen,
			maxmatches, substrlen, 
			sequences, noofseqs, filter, select, handler,
			input, imbiss, scores, depictsw);
		
		if (gnuplot) {	
			consensus (space, imbiss->consensus, input->length-substrlen, 
				input, substrlen, imbiss);
		}

		time (&endsuf);
		difrank = difftime (endsuf, startsuf);
	
		printf ("Building  the suffixtree has taken %f seconds.\n", difsuf);
		printf ("Match the suffixtree has taken %f seconds.\n", difmatch);
    		printf ("Rank  the suffixtree has taken %f seconds.\n", difrank);
	
		/*partial cleanup*/
		//destructStringset(space, queryurl);
		destructSequence(space, input);
		if(subfile) {
			FREEMEMORY(space, imbiss->sub);
		}

		FREEMEMORY(space, imbiss->consensus);
		FREEMEMORY(space, imbiss->score);
		FREEMEMORY(space, matches);
        free(bin);
        free(vec);
	}
	
	/*final cleanup*/
	for (i=0; i < noofseqs; i++) {
		destructSequence(space, sequences[i]);
	}
    FREEMEMORY(space, sequences);	
	destructSufArr(space, arr);
	
#ifdef MEMMAN_H
	activeblocks(space);
#endif
	
	printf("Goodbye.\n");	
	return EXIT_SUCCESS;
}
예제 #24
0
파일: sort.c 프로젝트: spundhir/RNA-Seq
Uint *
quickSortMultikey (void *space, void* toSort, Uint size, 
		   Uint (*cmp)(Uint, Uint, Uint, void *, void*),
		   Uint sentinel, void *info) 
{
  Sint a, b, c, d, v, n, r;
  TripleSint ins;
  Uint *sorted = NULL, offset;
  Uint depth = 0;
  VStack vstack;
	
	
  if (size == 0) return NULL;

  sorted = ALLOCMEMORY(space, NULL, Uint, size);
  if (size<=1) {
    sorted[0]=0;
  }
    
  for (r=0; r < size; r++) sorted[r]=r;	
  bl_vstackInit(&vstack, 100, sizeof(TripleSint));
  n = size;
  offset=0;
	
  while (1) {
    a = rand() % n;
    SWAPUINT(sorted, offset, a+offset);
    v = sorted[offset];
    a = b = 1;
    c = d = n-1;
	
    for(;;) {
	    
      while(b<=c&&((r=cmp(sorted[b+offset],v,depth,toSort,info))==2||r==0))
	{
		  	
	  if (r==0) {
	    SWAPUINT(sorted, a+offset, b+offset);
	    a++;
	  }
	  b++;
	}
      while(b<=c&&((r=cmp(sorted[c+offset],v,depth,toSort,info))==1||r==0)) 
	{
		  		
		  
	  if (r==0) {
	    SWAPUINT(sorted, c+offset, d+offset);
	    d--;
	  }
	  c--;
	}
      if (b > c) break;
      SWAPUINT(sorted, b+offset, c+offset);
      b++;
      c--;
    }	
    r = MIN(a, (b-a));
    vecswap(offset, (b-r)+offset, r, sorted);
    r = MIN((d-c), (n-d-1));
    vecswap(b+offset, (n-r)+offset, r, sorted);
    /*sort lesser*/
    r = b-a;
    if (r > 1) {
      ins.a = offset;
      ins.b = r;
      ins.c = depth;
      bl_vstackPush(&vstack, &ins);
    }
    /*sort equal*/
    if ((a+n-d-1) > 1 && cmp(sorted[r+offset], sentinel, depth, toSort, info) != 0)
      /*if (r > 1 && sorted[r+offset]!=sentinel)*/
      { 
	ins.a = r+offset;
	ins.b = (a+n-d-1);
	ins.c = depth+1;
	bl_vstackPush(&vstack, &ins);
      }
    /*sort greater*/
    r=d-c;
    if (r > 1) {
      ins.a = (n-r)+offset;
      ins.b = r;
      ins.c = depth;
      bl_vstackPush(&vstack, &ins);
    }
		
    if (!bl_vstackIsEmpty(&vstack)){
      ins = *((TripleSint *) bl_vstackPop(&vstack, NULL));
      offset = ins.a;
      r = ins.b;
      depth = ins.c;
    } else {
      break;
    }	
  }   
  bl_vstackDestruct(&vstack, NULL);
  return sorted;
}
예제 #25
0
Uint *
quickSortMultikey (void *space, void* toSort, Uint size, 
 					Uint (*cmp)(Uint, Uint, Uint, void *, void*),
					Uint sentinel, void *info) 
{
  	Sint a, b, c, d, v, n, r;
	Uint *sorted = NULL, offset;
	Uint depth = 0;
	Stack stack;
	
	
	if (size == 0) return NULL;

	sorted = ALLOCMEMORY(space, NULL, Uint, size);
	if (size<=1) {
      sorted[0]=0;
    }
    
    for (r=0; r < size; r++) sorted[r]=r;
	initStack(space, &stack, 100);	
	n = size;
	offset=0;
	
	while (1) {
		a = rand() % n;
		SWAPUINT(sorted, offset, a+offset);
		v = sorted[offset];
		a = b = 1;
		c = d = n-1;
	
	  	for(;;) {
	    
		  	while(b<=c&&((r=cmp(sorted[b+offset],v,depth,toSort,info))==2||r==0))
			{
		  	
			  	if (r==0) {
		  			SWAPUINT(sorted, a+offset, b+offset);
					a++;
		  		}
		  		b++;
			}
			while(b<=c&&((r=cmp(sorted[c+offset],v,depth,toSort,info))==1||r==0)) 
			{
		  		
		  
			  	if (r==0) {
		  			SWAPUINT(sorted, c+offset, d+offset);
					d--;
		  		}
		  		c--;
			}
			if (b > c) break;
			SWAPUINT(sorted, b+offset, c+offset);
			b++;
			c--;
		}	
		r = MIN(a, (b-a));
		vecswap(offset, (b-r)+offset, r, sorted);
		r = MIN((d-c), (n-d-1));
		vecswap(b+offset, (n-r)+offset, r, sorted);
		/*sort lesser*/
		r = b-a;
		if (r > 1) {
		
		    stackpush(space, &stack, offset);
			stackpush(space, &stack, r);
			stackpush(space, &stack, depth);
		}
		/*sort equal*/
		if ((a+n-d-1) > 1 && cmp(sorted[r+offset], sentinel, depth, toSort, info) != 0)
		/*if (r > 1 && sorted[r+offset]!=sentinel)*/
		{ 
	  		stackpush(space, &stack, r+offset);
			stackpush(space, &stack, (a+n-d-1));
			stackpush(space, &stack, depth+1);
		}
		/*sort greater*/
		r=d-c;
		if (r > 1) {
	
		    stackpush(space, &stack, (n-r)+offset);
			stackpush(space, &stack, r);
			stackpush(space, &stack, depth);
		}
		
		if (!stackisempty(&stack)) {
			depth = stackpop(&stack);
			n = stackpop(&stack);
			offset = stackpop(&stack);
		} else {
			break;
		}	
	}	 
	destructStack(space, &stack);	
   	return sorted;
}
void
constructsuflinks(void *space, Suffixarray *s) {

  Uint   i,
         j,
         a,
         b,
         k,
         nooflists,
         lcp,
         pos;
  Stack  istack;
  Stack  jstack;

  List   *children,
         **lists;
  PairUint **data,
           slinkinterval;

  nooflists = maxlcp(s) +1;
  lists = ALLOCMEMORY(space, NULL, List*, nooflists);
  memset(lists, 0, sizeof(List*)*nooflists);

  initStack(space, &istack, 1000);
  initStack(space, &jstack, 1000);

  stackpush(space, &istack, 0);
  stackpush(space, &jstack, s->numofsuffixes-1);

  while(!stackisempty(&istack)) {
    i = stackpop(&istack);
    j = stackpop(&jstack);
    lcp = getlcpval(s, i, j);

    /*printf("adding list %d\n", lcp);*/
    if (lists[lcp] == NULL) {
      lists[lcp] = initList(space, 10);
    }

    addinterval(space, lists[lcp], i, j);

    /*printf("lcp: %d-[%d,%d]\n", lcp, i, j);*/
    children = getChildintervals(space, s, i, j);
    data = (PairUint**) dataList(space, children);

    for(k=children->length; k > 0; k--) {
      a = data[k-1]->a;
      b = data[k-1]->b;

      FREEMEMORY(space, data[k-1]);

      if(a != b) { 
        stackpush(space, &istack, a);
        stackpush(space, &jstack, b);
      }
    }

    FREEMEMORY(space, data);
    wrapList(space, children, NULL);
  }

  destructStack(space, &istack);
  destructStack(space, &jstack);

  s->suflink_l = ALLOCMEMORY(space, NULL, Uint, s->numofsuffixes);
  s->suflink_r = ALLOCMEMORY(space, NULL, Uint, s->numofsuffixes);
  memset(s->suflink_l, 0, sizeof(Uint)*s->numofsuffixes);
  memset(s->suflink_r, 0, sizeof(Uint)*s->numofsuffixes);

  for(i=1; i < nooflists; i++) {
    if(lists[i] !=  NULL && lists[i-1] !=NULL) {
      for(j=0; j < lists[i]->length; j++) {
       /*printf("looking at interval [%d,%d], list %d\n", ((PairUint*)lists[i]->nodes[j].data)->a, ((PairUint*)lists[i]->nodes[j].data)->b, i);*/
        slinkinterval = findslinkinterval(space, s, lists, i, j);
        pos = getfirstlindex(s, ((PairUint*)lists[i]->nodes[j].data)->a, ((PairUint*)lists[i]->nodes[j].data)->b);
       /*printf("store at %d: [%d,%d]\n", pos, slinkinterval.a, slinkinterval.b);*/
        s->suflink_l[pos]=slinkinterval.a;
        s->suflink_r[pos]=slinkinterval.b;
      }
    }
    wrapList(space, lists[i-1], destructinterval);
  }

  FREEMEMORY(space, lists);
  return;
}