double scorefilter (void *space, Matchtype *m, IntSequence *a, IntSequence *b, Uint *ptr, Uint len, Uint pos, void *info) { Uint l; double temp = 0; double sum = 0; imbissinfo *imbiss; imbiss=(imbissinfo*) info; m->count++; m->pos = ALLOCMEMORY(space, m->pos, Uint, m->count); m->org = ALLOCMEMORY(space, m->org, Uint, m->count); m->pos[(m->count)-1]=pos; m->org[(m->count)-1]=pos; for (l=0; l < len; l++) { temp = ((imbissinfo*)info)->score[(Uint)*ptr]; sum += temp; m->score += temp; ptr++; } m->blast = m->blast > sum ? m->blast : sum; imbiss->consensus[pos] += (Uint) 1 /*((double)imbiss->lambda*sum)*/; return sum > 0 ? sum : 0; }
Suffixarray* constructSufArr(void *space, IntSequence **s, Uint len, FAlphabet* alphabet) { Uint i, numofsuffixes, *sorted, *inv_suftab, **suffixptr; MultiIntSeq *mseq; Suffixarray *arr; mseq = concatIntSequences(space, s, len, 4000, 4001); numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes); arr = ALLOCMEMORY(space, NULL, Suffixarray, 1); suffixptr = getSuffixPtr(space, mseq, 4000, 4001); sorted = quickSortMultikey (space, suffixptr, numofsuffixes, cmpIntSequence, numofsuffixes-1, NULL); for (i=0; i < numofsuffixes; i++) { inv_suftab[sorted[i]]=i; } arr->seq = mseq; arr->numofsuffixes = numofsuffixes; arr->suffixptr = suffixptr; arr->suftab = sorted; arr->inv_suftab = inv_suftab; return arr; }
void constructchildtab(void *space, Suffixarray *s) { Uint i; int lastIndex = -1; Stack *stack; s->chldtab = ALLOCMEMORY(space, NULL, childtab, s->numofsuffixes); memset(s->chldtab, 0, s->numofsuffixes*sizeof(childtab)); stack = ALLOCMEMORY(space, NULL, Stack, 1); initStack(space, stack, 100000); stackpush(space, stack, 1); for(i=1; i < s->numofsuffixes; i++) { while(s->lcptab[i] < s->lcptab[stacktop(stack)]) { lastIndex = stackpop(stack); if(s->lcptab[i] <= s->lcptab[stacktop(stack)] && s->lcptab[stacktop(stack)] != s->lcptab[lastIndex]) { s->chldtab[stacktop(stack)].down = lastIndex; if (s->chldtab[stacktop(stack)].val != 0) printf("down conflict\n"); s->chldtab[stacktop(stack)].val = lastIndex; } } if (lastIndex != -1) { s->chldtab[i].up = lastIndex; if (s->chldtab[i-1].val != 0) printf("up conflict\n"); s->chldtab[i-1].val = lastIndex; lastIndex = -1; } stackpush(space, stack, i); } /*construction of nextlIndex value*/ destructStack(space, stack); initStack(space, stack, 10000); stackpush(space, stack,0); for(i=1; i < s->numofsuffixes; i++) { while(s->lcptab[i] < s->lcptab[stacktop(stack)]) { stackpop(stack); } if (s->lcptab[i] == s->lcptab[stacktop(stack)]) { lastIndex = stackpop(stack); s->chldtab[lastIndex].nextlIndex = i; s->chldtab[lastIndex].val = i; } stackpush(space, stack, i); } return; }
void constructLcp (void *space, Suffixarray *arr) { Uint i, j, k; Uint s,t; int l=0; arr->lcptab = ALLOCMEMORY(space, NULL, Uint, arr->numofsuffixes); for(i=0; i < arr->numofsuffixes; i++) { j = arr->inv_suftab[i]; if (j > 0) { k = arr->suftab[j-1]; s = arr->suffixptr[k]-arr->seq->sequences; t = arr->suffixptr[i]-arr->seq->sequences; l=l-1; if (l < 0) l=0; while ((s+l < arr->seq->totallength) && (t+l < arr->seq->totallength) && (arr->seq->sequences[s+l] == arr->seq->sequences[t+l])){ l++; } arr->lcptab[j] = l; } } return; }
Uint *quickSort(void *space, void* toSort, Uint size, Uint (*cmp)(Uint, Uint, void *, void*), void *info) { int left, left2, right, right2; PairSint ins, *lr; Uint i, resc, *sorted, x; VStack vstack; sorted = ALLOCMEMORY(space, NULL, Uint, size); for (i=0; i < size; i++) sorted[i]=i; ins.a = 0; ins.b = size-1; bl_vstackInit(&vstack, 10000, sizeof(PairSint)); bl_vstackPush(&vstack, &ins); while (!bl_vstackIsEmpty(&vstack)){ lr = (PairSint *) bl_vstackPop(&vstack, NULL); left = lr->a; right = lr->b; free(lr); while (left < right) { x=sorted[(left+right)/2]; left2 = left; right2 = right; do { while(cmp(sorted[left2], x, toSort, info)==2){ left2++; } while(cmp(sorted[right2], x, toSort, info)==1){ right2--; } if(left2 <= right2) { resc = sorted[right2]; sorted[right2]=sorted[left2]; sorted[left2]=resc; left2++; right2--; } } while (right2 >= left2); if ((left2-left) > (right-left2)) { ins.a = left; ins.b = right2; bl_vstackPush(&vstack, &ins); left = left2; } else { ins.a = left2; ins.b = right; bl_vstackPush(&vstack, &ins); right = right2; } } } bl_vstackDestruct(&vstack, NULL); return sorted; }
Suffixarray* constructSufArr(void *space, CharSequence **s, Uint len, FAlphabet* alphabet) { Uint i, numofsuffixes, *sorted, *inv_suftab; char **suffixptr; MultiCharSeq *mseq; Suffixarray *arr; mseq = concatCharSequences(space, s, len, (char)254, (char)127); numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; //printf("md5: %s\n", MD5((unsigned char*)"f**k",4,NULL)); fprintf(stderr, "alphabet of size (%d): %s\n", mseq->mapsize, mseq->map); //fprintf(stderr, "allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint)); inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes); arr = ALLOCMEMORY(space, NULL, Suffixarray, 1); fprintf(stderr, "constructing suftab.\n"); suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127); //mseq->sequences[numofsuffixes-1]=0; sorted = alurusort(space, mseq->sequences, &(numofsuffixes)); // mseq->sequences[numofsuffixes-1]=save; /* sorted = quickSortMultikey (space, suffixptr, numofsuffixes, cmpCharSequence, numofsuffixes-1, NULL); */ fprintf(stderr, "constructing inv_suftab.\n"); for (i=0; i < numofsuffixes; i++) { inv_suftab[sorted[i]]=i; } arr->seq = mseq; arr->numofsuffixes = numofsuffixes; arr->suffixptr = suffixptr; arr->suftab = sorted; arr->inv_suftab = inv_suftab; return arr; }
Suffixarray * readSuffixarray(void *space, char *idxfilename, CharSequence **seqs, Uint len) { FILE *fp; char **suffixptr; Uint nmemb = 0, idvmemb = 0, llvmemb = 0, numofsuffixes, *suftab = NULL, *suflink = NULL; childtab *chldtab = NULL; unsigned char flags=0, *lcpctab = NULL; signed char *id = NULL; PairUint *llvtab = NULL; PairSint *idvtab = NULL; MultiCharSeq *mseq; Suffixarray *s; mseq = concatCharSequences(space, seqs, len, (char)254, (char)127); numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127); fp = fopen(idxfilename, "r"); if (fp == NULL) { fprintf(stderr,"Couldn't open file '%s'. Exit forced.\n", idxfilename); exit(-1); } fread(&nmemb, sizeof(Uint), 1, fp); suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb); fread(suftab, sizeof(Uint), nmemb, fp); fread(&flags, sizeof(char), 1, fp); if (flags & LCP_TAB_STORED) { fprintf(stderr, "reading lcpc/vtab\n"); lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb); fread(lcpctab, sizeof(unsigned char), nmemb, fp); fread(&llvmemb, sizeof(Uint), 1, fp); llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb); fread(llvtab, sizeof(PairUint), llvmemb, fp); }
Uint *quickSort(void *space, void* toSort, Uint size, Uint (*cmp)(Uint, Uint, void *, void*), void *info) { Stackelement left, left2, right, right2; Uint i, resc, *sorted, x; Stack stack; sorted = ALLOCMEMORY(space, NULL, Uint, size); for (i=0; i < size; i++) sorted[i]=i; initStack(space, &stack, 10000); stackpush(space, &stack, 0); stackpush(space, &stack, size-1); while (!stackisempty(&stack)) { right=stackpop(&stack); left=stackpop(&stack); while (left < right) { x=sorted[(left+right)/2]; left2 = left; right2 = right; do { while(cmp(sorted[left2], x, toSort, info)==2){ left2++; } while(cmp(sorted[right2], x, toSort, info)==1){ right2--; } if(left2 <= right2) { resc = sorted[right2]; sorted[right2]=sorted[left2]; sorted[left2]=resc; left2++; right2--; } } while (right2 >= left2); if ((left2-left) > (right-left2)) { /*if ((right2-left) > (right-left2)) {*/ stackpush(space, &stack, left); stackpush(space, &stack, right2); left = left2; } else { stackpush(space, &stack, left2); stackpush(space, &stack, right); right = right2; } } } destructStack(space, &stack); return sorted; }
fasta_t* initfasta(void *space) { fasta_t *f; f = ALLOCMEMORY(space, NULL, fasta_t, 1); f->seqs = NULL; f->noofseqs = 0; return f; }
Suffixarray* constructSufArr(void *space, CharSequence **s, Uint len, FAlphabet* alphabet) { Uint i, numofsuffixes, *sorted, *inv_suftab; char **suffixptr; MultiCharSeq *mseq; Suffixarray *arr; mseq = concatCharSequences(space, s, len, (char)254, (char)127); numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; printf("allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint)); inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes); arr = ALLOCMEMORY(space, NULL, Suffixarray, 1); printf("constructing suftab.\n"); suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127); sorted = quickSortMultikey (space, suffixptr, numofsuffixes, cmpCharSequence, numofsuffixes-1, NULL); printf("constructing inv_suftab.\n"); for (i=0; i < numofsuffixes; i++) { inv_suftab[sorted[i]]=i; } arr->seq = mseq; arr->numofsuffixes = numofsuffixes; arr->suffixptr = suffixptr; arr->suftab = sorted; arr->inv_suftab = inv_suftab; return arr; }
void getimbissblast(void *space, IntSequence *query, IntSequence **seqs, Uint noofseqs, FAlphabet *alphabet, imbissinfo *imbiss){ double *df, *sf, *scr; double avgsum=0, inputscr=0, lambda=0, K=0; Uint *sortind, i; evdparam *evd; /*frequency of query and database*/ df = dbfreq(space, seqs, noofseqs, alphabet, 1); sf = seqfreq(space, query, alphabet); scr = logoddscr(space, df, sf, alphabet); for (i=0; i < alphabet->domainsize; i++) avgsum += df[i]*scr[i]; for (i=0; i < query->length; i++) inputscr += scr[query->sequence[i]]; sortind = quickSort(space, scr, alphabet->domainsize, cmp_dbl, NULL); evd=ALLOCMEMORY (space, NULL, evdparam, 1); evd->noofscores = alphabet->domainsize; evd->probs = df; evd->scores = scr; lambda = uniroot(0, 1, score_evd, 0.0000001, evd); FREEMEMORY(space, evd); K = relentropy(space, sortind, scr, alphabet->domainsize, df, lambda); if (K <= 0) K=1; imbiss->score = scr; imbiss->H = 0; imbiss->K = K; imbiss->lambda = lambda; printf("\nBLAST statistics:\n-------------------\n"); printf("E(score): %f\n", avgsum); printf("inputscr: %f\n", inputscr); printf("lambda: %19.16e\n", lambda); printf("check: %19.16e\n", checklambda(scr, alphabet->domainsize, df, avgsum, lambda)); printf("K: %19.16e\n\n", K); FREEMEMORY(space, sortind); FREEMEMORY(space, df); FREEMEMORY(space, sf); return; }
void bl_radixSortKeyFirst(void *space, void *tosrt, size_t size, size_t nelem, Uint bits) { char *p, *b, *src, *toSort; Uint *cast; Uint mask, offset=0, i, key; Uint cntsize; Uint *cnt; toSort = (char*) tosrt; cntsize = 1 << bits; cnt = ALLOCMEMORY(space, NULL, Uint, cntsize); memset(cnt, 0, sizeof(Uint)*cntsize); b = src = malloc(size*nelem); mask =~ (UINT_MAX<<bits); for(; mask; mask <<= bits, offset+=bits) { for(p=toSort; p < toSort+(nelem*size); p+=size) { cast = (Uint*)p; key = (*cast & mask) >> offset; ++cnt[key]; } for(i=1; i < cntsize; ++i) { cnt[i]+=cnt[i-1]; } for(p=toSort+((nelem-1)*size); p >= toSort; p-=size) { cast = (Uint*)p; key = (*cast & mask) >> offset; memmove(b+((cnt[key]-1)*size), p, size); --cnt[key]; } p=b; b=toSort; toSort=p; memset(cnt, 0, sizeof(Uint)*cntsize); } if(toSort == src) memcpy(b, toSort, size*nelem); FREEMEMORY(space, src); FREEMEMORY(space, cnt); return; }
vector_t *decode_2tupel_cantor(void *space, Uint i) { Uint j,y,x; vector_t *v=NULL; v=ALLOCMEMORY(space, v, vector_t, 1); INITVECTOR(v); j = q_cantor(i); y = i-f_cantor(j); x = j-y; APPENDVEC(space, v, x); APPENDVEC(space, v, y); return v; }
void addinterval(void *space, List *list, Uint a, Uint b) { PairUint *range; range = ALLOCMEMORY(space, NULL, PairUint, 1); range->a=a; range->b=b; if (list->length == 0) { insertAfter(space, list, LISTNILVALUE, range); } else { insertAfter(space, list, list->lastNode, range); } return; }
void bl_radixSortUint(void *space, Uint *toSort, size_t nelem, Uint bits) { Uint *p, *b, *src; Uint mask, offset=0, i, key; Uint cntsize; Uint *cnt; cntsize = 1 << bits; cnt = ALLOCMEMORY(space, NULL, Uint, cntsize); memset(cnt, 0, sizeof(Uint)*cntsize); b = src = malloc(sizeof(Uint)*nelem); mask =~ (UINT_MAX<<bits); for(; mask; mask <<= bits, offset+=bits) { for(p=toSort; p < toSort+nelem; ++p) { key = (*p & mask) >> offset; ++cnt[key]; } for(i=1; i < cntsize; ++i) { cnt[i]+=cnt[i-1]; } for(p=toSort+((nelem-1)); p >= toSort; --p) { key = (*p & mask) >> offset; b[cnt[key]-1] = *p; --cnt[key]; } p=b; b=toSort; toSort=p; memset(cnt, 0, sizeof(Uint)*cntsize); } if(toSort == src) memcpy(b, toSort, sizeof(Uint)*nelem); FREEMEMORY(space, src); FREEMEMORY(space, cnt); return; }
vector_t *decodeCantor(void *space, Uint code, Uint n) { Uint i; vector_t *v = NULL; vector_t *r = NULL; v=ALLOCMEMORY(space, v, vector_t, 1); INITVECTOR(v); for (i=0; i < (n-1); i++) { r = decode_2tupel_cantor(space, code); APPENDVEC(space, v, VECTOR(r,0)); code = VECTOR(r,1); FREEMEMORY(space, r); } APPENDVEC(space, v, VECTOR(r,1)); return (v); }
PairSint* sufSubstring (void *space, Suffixarray *arr, Uint *pattern, Uint len, Uint sublen) { Uint i; PairSint *res, d; if (len <= sublen) { return NULL; } res = ALLOCMEMORY(space, NULL, PairSint, len-sublen); for(i=0; i < len-sublen; i++) { d=mmsearch(arr, &pattern[i], sublen, 0, 0, arr->numofsuffixes-1); res[i].a=d.a; res[i].b=d.b; } return res; }
if(!silent) MSG("reading uint id.\n"); fread(idvutab, sizeof(PairUint), idvmemb, fp); for(idvi=0; idvi < idvmemb; idvi++) { idvtab[idvi].a = idvutab[idvi].a; idvtab[idvi].b = idvutab[idvi].b; } free(idvutab); } } if ((flags & MD5_STORED)) { mdfive = ALLOCMEMORY(space, NULL, unsigned char, 16); fread(mdfive, sizeof(unsigned char), 16, fp); } s = ALLOCMEMORY(space, NULL, Suffixarray, 1); if ((flags & LINT_SUFLINKS)) s->llint = 1; else s->llint=0; s->suftab = suftab; s->seq = mseq; s->numofsuffixes = numofsuffixes; s->lcpctab = lcpctab; s->llvtab = llvtab; s->llvcnt = llvmemb; s->inv_suftab=NULL; s->chldtab = chldtab; #ifdef SUFLINK_MMAP s->suflink = (Uint*) &suflinkptr[pagediff_sl]; s->id = &id[pagediff_id];
int main(int argc, char** argv) { char* content; Uint contentlen, i, j, k, l, id, lines=0; stringset_t *set, *set2, **csv, *que; CharSequence **s; Suffixarray *sarray; MultiCharSeq *mseq; PairSint d, *matches = NULL; Uint totallength = 0; Uint wsize=10; Uint counter=0; Uint all=0; int *space = NULL; char *pattern= "GGAAGAAAGCGTGGGGTTTG"; char *pattern2= "TGATTAGTGATTAGTGATTA"; char *pattern3= "ACAAACATAT"; char *start; time_t startsuf, endsuf; double difsuf; Uint noofchildren; List *list; PairUint **childinterval; gnuplot_ctrl *h; double *genome; //set = readfasta(&space, "HP26695.fasta"); //csv = readcsv(&space, "HP12_GCTC.inserts", "", &lines); /*s = ALLOCMEMORY(&space, NULL, CharSequence *, set->noofstrings); for(i=0; i < set->noofstrings/2; i++) { totallength += set->strings[i].len; s[i] = ALLOCMEMORY(&space, NULL, CharSequence, 1); s[i]->sequence = set->strings[i].str; s[i]->length = set->strings[i].len; / printf("%s,", set->strings[i].str); printf("\n"); / }*/ s = ALLOCMEMORY(&space, NULL, CharSequence *, 1); s[0] = ALLOCMEMORY(&space, NULL, CharSequence, 1); s[0]->sequence = pattern3; s[0]->length = strlen(pattern3); genome = ALLOCMEMORY(&space, NULL, double, totallength); memset(genome, 0, sizeof(double)*totallength); time (&startsuf); sarray = constructSufArr(&space, s, 1/*set->noofstrings/2*/, NULL); constructLcp(space, sarray); dumplcptab(sarray); constructchildtab(space, sarray); time (&endsuf); difsuf = difftime (endsuf, startsuf); printf("noofsuffixes: %d\n", sarray->numofsuffixes); dumpchildtab(sarray); dumpSufArr(sarray); list = getChildintervals(space, sarray, 0, 5); childinterval = (PairUint**) dataList(space, list); for(i=0; i < list->length; i++) { printf("[%d,%d]\n", childinterval[i]->a, childinterval[i]->b); } constructsuflinks(space, sarray); for(k=1; k < lines; k+=2) { // printf("searching %s\n", csv[k]->strings[0].str); if(csv[k]->strings[0].len > 8) { if(wsize > csv[k]->strings[0].len) { d=mmsearch(sarray, csv[k]->strings[0].str, csv[k]->strings[0].len, 0, 0, sarray->numofsuffixes-1); // printf("suffixes were found at positions (%d, %d)\n",d.a, d.b); for (j=d.a; j <= d.b; j++) { genome[sarray->suftab[j]]++; } } else { for(l=0; l < csv[k]->strings[0].len-wsize; l++) { d=mmsearch(sarray, &csv[k]->strings[0].str[l], wsize, 0, 0, sarray->numofsuffixes-1); // printf("suffixes were found at positions (%d, %d)\n",d.a, d.b); for (j=d.a; j <= d.b; j++) { genome[sarray->suftab[j]]++; /* start = sarray->suffixptr[sarray->suftab[j]]; printf("pattern was: %s\n", &csv[k]->strings[0].str[l]); printf("suffix found: "); for (i=0; i < wsize; i++) { printf("%c", start[i]); } printf("\n"); id = getMultiCharSeqIndex(sarray->seq, sarray->suffixptr[sarray->suftab[j]]); printf("found in sequence: %d\n", id); */ } } } if (d.a < d.b) counter++; all++;} } destructStringset(&space, set); writeY("out.xy", genome, totallength); /*h = gnuplot_init(); gnuplot_setstyle(h, "points"); gnuplot_cmd(h, "set title 'IMBISS - seed statistics' -28,0 font'Helvetica,15'"); gnuplot_cmd(h, "set label 'seed length: %d' at graph 0.05,0.95 font 'Helvetica, 12'", totallength); gnuplot_set_xlabel(h, "matches"); gnuplot_set_ylabel(h, "position"); gnuplot_plot_x(h, genome, totallength, "position"); */ printf ("sliding windows of %d sequences (of %d) found\n", counter, all); printf ("Building the suffixarray has taken %f seconds.\n", difsuf); printf ("Total length of suffixarray was %d\n", totallength); while(1); return EXIT_SUCCESS; }
Suffixarray * readSuffixarray(void *space, char *idxfilename, CharSequence **seqs, Uint len, unsigned char silent) { FILE *fp; Uint nmemb = 0, idvmemb = 0, llvmemb = 0, numofsuffixes, *suftab = NULL, idvi =0; childtab *chldtab = NULL; unsigned char flags=0, *lcpctab = NULL; unsigned char *mdfive=NULL, *check=NULL; PairUint *llvtab = NULL; PairLSint *idvtab = NULL; PairSint *idvutab = NULL; MultiCharSeq *mseq; Suffixarray *s; #ifdef SUFLINK_MMAP int fd; signed char *id = NULL; long curiopos, offset; struct stat sb; char *suflinkptr; int pagediff_id; int pagediff_sl; #elif SUFLINK_DISKACC int fd; off_t off_sl; off_t off_id; #else signed char *id = NULL; Uint *suflink = NULL; #endif mseq = concatCharSequences(space, seqs, len, (char)126, (char)127); numofsuffixes = mseq->totallength; fp = fopen(idxfilename, "r"); if (fp == NULL) { DBG("Couldn't open file '%s'. Exit forced.\n", idxfilename); exit(-1); } fread(&nmemb, sizeof(Uint), 1, fp); suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb); fread(suftab, sizeof(Uint), nmemb, fp); fread(&flags, sizeof(char), 1, fp); if (flags & LCP_TAB_STORED) { if (!silent) MSG("reading lcpc/vtab.\n"); lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb); fread(lcpctab, sizeof(unsigned char), nmemb, fp); fread(&llvmemb, sizeof(Uint), 1, fp); llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb); fread(llvtab, sizeof(PairUint), llvmemb, fp); } if (flags & CHLD_TAB_STORED) { if(!silent) MSG("reading childtab.\n"); chldtab = ALLOCMEMORY(space, NULL, childtab, nmemb); fread(chldtab, sizeof(childtab), nmemb, fp); } if ((flags & SUFLINK_TAB_STORED)) { if(!silent) MSG("reading suflinks.\n"); #ifdef SUFLINK_MMAP curiopos = ftell(fp); fd = open(idxfilename, O_RDONLY); if (fd == -1) { perror("open"); exit(EXIT_FAILURE); } if (fstat(fd, &sb) == -1) { perror("fstat"); exit(EXIT_FAILURE); } offset = curiopos & ~(sysconf(_SC_PAGE_SIZE) - 1); if (curiopos >= sb.st_size) { fprintf(stderr, "offset is past end of file\n"); exit(EXIT_FAILURE); } pagediff_sl = curiopos - offset; suflinkptr = mmap(0, nmemb*sizeof(Uint) + pagediff_sl, PROT_READ, MAP_SHARED, fd, offset); if (suflinkptr == MAP_FAILED) { perror("mmap"); exit(EXIT_FAILURE); } #elif SUFLINK_DISKACC sl_diskacc = 1; off_sl = ftell(fp); fd = open(idxfilename, O_RDONLY); #else suflink = ALLOCMEMORY(space, NULL, Uint, nmemb); fread(suflink, sizeof(Uint), nmemb, fp); #endif #ifdef SUFLINK_MMAP offset = (curiopos+(nmemb*sizeof(Uint))) & ~(sysconf(_SC_PAGE_SIZE) - 1); if (curiopos >= sb.st_size) { fprintf(stderr, "offset is past end of file\n"); exit(EXIT_FAILURE); } pagediff_id = (curiopos+(nmemb*sizeof(Uint))) - offset; id = mmap(0, nmemb*sizeof(signed char) + pagediff_id, PROT_READ, MAP_SHARED, fd, offset); if (id == MAP_FAILED) { perror("mmap"); exit(EXIT_FAILURE); } fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR); #elif SUFLINK_DISKACC off_id = off_sl+(nmemb*sizeof(Uint)); fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR); #else id = ALLOCMEMORY(space, NULL, signed char, nmemb); fread(id, sizeof(signed char), nmemb, fp); #endif fread(&idvmemb, sizeof(Uint), 1, fp); idvtab = ALLOCMEMORY(space, NULL, PairLSint, idvmemb); if ((flags & LINT_SUFLINKS)) { if(!silent) MSG("reading lsint id.\n"); fread(idvtab, sizeof(PairLSint), idvmemb, fp); } else { idvutab = ALLOCMEMORY(space, NULL, PairSint, idvmemb); if(!silent) MSG("reading uint id.\n"); fread(idvutab, sizeof(PairUint), idvmemb, fp); for(idvi=0; idvi < idvmemb; idvi++) { idvtab[idvi].a = idvutab[idvi].a; idvtab[idvi].b = idvutab[idvi].b; } free(idvutab); } } if ((flags & MD5_STORED)) { mdfive = ALLOCMEMORY(space, NULL, unsigned char, 16); fread(mdfive, sizeof(unsigned char), 16, fp); }
void matchkdseed( void *space, Suffixarray *s, fasta_t *reads, Uint minsize, char *outfile, Uint *counter, unsigned char silent, Uint s_ext, Uint p_mis, Uint Xoff, Uint k_p, Uint rep_type, Uint hitstrategy, Uint bedist, unsigned char showalignment, double maxevalue, int acc, Uint M, unsigned char matchingstat, FILE *dev, FILE *nomatchdev) { double H, K, lambda; char *buffer, *curseq; Uint k, curlen, dim, wordno; bitvector *D, *Mv; Gmap map; gread_t read; gmatch_t *mmatches=NULL, *pmatches=NULL; matchstem_t *V; int plusdiff, minusdiff, noofmatches=0; pthread_mutex_t *mtx=NULL; pthread_mutex_t *mtx2=NULL; Uint *enctab, i,j, nmmatch, npmatch; int bmscr, bpscr; unsigned char uninformative = 0, beststrand = 0, best = 0; if (hitstrategy == 2) { beststrand = 1; best = 1; } else if (hitstrategy == 1) { beststrand = 1; } /*build alignment matrix*/ enctab = encodetab(s->seq->map, s->seq->mapsize); dim = reads->maxlen + 2*((reads->maxlen-ceil((acc*reads->maxlen)/100))+4); wordno = reads->maxlen/BITVECTOR_WORDSIZE; // wordno += ((reads->maxlen & (BITVECTOR_WORDSIZE-1)) > 0) ? 1 : 0; wordno++; D = ALLOCMEMORY(space, NULL, bitvector, 2*(dim+1)); Mv = &D[dim+1]; for(i=0; i <= dim; i++) { D[i] = initbitvector(space, wordno*BITVECTOR_WORDSIZE); Mv[i] = initbitvector(space, wordno*BITVECTOR_WORDSIZE); } if (counter == NULL) { initProgressBarVT(); } else { mtx = &mutex1; mtx2 = &mutex2; } karlinunitcostpp(space, &lambda, &H, &K); for (k=0; k < reads->noofseqs; k++) { plusdiff = 0; minusdiff = 0; noofmatches = 0; if (!silent) { if (counter == NULL) { progressBarVT("reads matched.", reads->noofseqs, k, 25); } else { (*counter)++; } } curseq = reads->seqs[k]->sequence; curlen = reads->seqs[k]->length; npmatch = 0; nmmatch = 0; if(curlen >= minsize) { initGmap(&map, s->seq, 1); initRead(&read, reads->seqs[k]); bpscr = 0; bmscr = 0; V=kdseeds(space, s, curseq, curlen, s_ext, p_mis, Xoff, k_p); #ifdef KDUNINFORMATIVE if(V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) { uninformative = 1; plusdiff = V[0].branches[0].r - V[0].branches[0].l; } else #endif if(!matchingstat) { uninformative = 0; pmatches = alignkdmatches(space, s, V, reads->seqs[k], curseq, curlen, M, enctab, beststrand, bedist, lambda, H, K, maxevalue, acc, D, dim, &npmatch, &bpscr); if(npmatch > 0) { setMatches(&read, pmatches, npmatch, PLUSSTRAND); } } else { printf("#%d %s\n",curlen, reads->seqs[k]->description); dumpkdseeds(s, V, curlen, '+', M); } for(j=0; j < curlen; j++) { if (V[j].noofbranches > 0) { FREEMEMORY(space, V[j].branches); } } FREEMEMORY(space, V); /*search the complement*/ buffer = charDNAcomplement(space, curseq, curlen); V=kdseeds(space, s, buffer, curlen, s_ext, p_mis, Xoff, k_p); #ifdef KDUNINFORMATIVE if(uninformative && V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) { minusdiff = V[0].branches[0].r - V[0].branches[0].l; uninformative = 1; } else #endif if(!matchingstat) { uninformative = 0; mmatches = alignkdmatches(space, s, V, reads->seqs[k], buffer, curlen, M, enctab, beststrand, bedist, lambda, H, K, maxevalue, acc, D, dim, &nmmatch, &bmscr); if(nmmatch > 0) { setMatches(&read, mmatches, nmmatch, MINUSSTRAND); } if (best) { bpscr = MIN(bmscr, bpscr); bmscr = bpscr; } else if (!beststrand) { bpscr = bmscr = curlen-ceil((acc*curlen)/100); } setReads(&map, &read, 1); reportMatch(dev, &map, rep_type, showalignment, mtx, bpscr, bmscr); FREEMEMORY(space, pmatches); FREEMEMORY(space, mmatches); pmatches = NULL; mmatches = NULL; } else { dumpkdseeds(s, V, curlen, '-', M); } for(j=0; j < curlen; j++) { if (V[j].noofbranches > 0) { FREEMEMORY(space, V[j].branches); } } FREEMEMORY(space, V); FREEMEMORY(space, buffer); } if(nomatchdev && nmmatch == 0 && npmatch == 0) { if (mtx2 != NULL) pthread_mutex_lock(mtx2); fprintf(nomatchdev, "%s\n%s\n", reads->seqs[k]->description, reads->seqs[k]->sequence); fflush(nomatchdev); if (mtx2 != NULL) pthread_mutex_unlock(mtx2); } } wrapBitmatrix(space, D, 2*(dim+1)); FREEMEMORY(space, D); FREEMEMORY(space, enctab); return; }
gmatch_t* alignkdmatches(void *space, Suffixarray *s, matchstem_t *M, CharSequence *query, char *curseq, Uint m, Uint t, Uint *enctab, unsigned char bestonly, Uint bedist, double lambda, double H, double K, double maxevalue, int acc, bitvector* D, Uint dim, Uint* nmatch, int* bscr) { Uint k,j,l,r,q,noofmatches=0, mat, mis, ins, del; Lint pos, margin, schr, echr, sstart, slen, i ; char *sseq; unsigned int idx; Alignment *al=NULL; bitvector *peq; PairSint result; #ifdef ALIGNDBG PairSint result2; CharSequence *checkseq; #endif double E; int scr; int maxedist = 0; int bestscr = 0; gmatch_t *matches=NULL; margin = bestscr = maxedist = m-ceil((acc*m)/100); peq = getpeq(NULL, curseq, m, s->seq->map, s->seq->mapsize, enctab); for(i=0; i < m; i++) { for(q=0; q < M[i].noofbranches; q++) { l = M[i].branches[q].l; r = M[i].branches[q].r; scr = M[i].branches[q].mat - (M[i].branches[q].mis+M[i].branches[q].ins+M[i].branches[q].del); E = evalue(lambda, K, spacemult(m, s->numofsuffixes, H, K), scr); if(l <= r && E <= maxevalue && (r-l) <= t) { for(j=l; j <= r; j++) { pos = s->suftab[j]; /*skip marginal matches*/ for(k=0; k < noofmatches; k++) if (abs((signed int)matches[k].p-(pos-(i+margin))) <= margin) break; if (k == noofmatches) { idx = getMultiCharSeqIndex(s->seq, &s->seq->sequences[pos]); schr = (idx > 0) ? s->seq->markpos[idx-1]+1 : 0; echr = s->seq->markpos[idx]; assert(echr >= pos); sstart = MAX(schr, pos-(i+margin)); slen = (echr > sstart+m+2*(margin+1)) ? m+2*(margin+1) : (echr-sstart)+1; sseq = &s->seq->sequences[sstart]; /* if(echr == pos) { fprintf(stderr, "\n\n sstart:%lld, slen:%lld, ssend:%lld, echr:%lld\n\n", sstart, slen, sstart+slen-1, echr); } */ myersbitmatrix(NULL, curseq, m, sseq, slen, s->seq->map, s->seq->mapsize, enctab, m-bestscr, peq, &result, D, slen); #ifdef ALIGNDBG result2 = myersbitvector(NULL, curseq, m, sseq, slen, s->seq->map, s->seq->mapsize, enctab, m-bestscr, peq); assert(result.a == result2.a && result.b == result2.b); #endif if (result.a != -1 && result.b <= maxedist && result.b <= bestscr && result.a < slen) { al = ALLOCMEMORY(space, NULL, Alignment, 1); initAlignment(al, curseq, m, 0, sseq, slen, 0); bitvectorbacktrack(al, D, slen, m, result.a); #ifdef ALIGNDBG assert(getEdist(al) == result.b); checkseq = (CharSequence*) s->seq->ref[idx].ref; assert(strncmp(& checkseq->sequence[pos-schr], &s->seq->sequences[pos], slen) == 0); #endif countEops(al, &mat, &mis, &del, &ins); /*skip identical matches*/ for(k=0; k < noofmatches; k++) { if (matches[k].p == sstart+al->voff) break; } if (k == noofmatches) { matches=realloc(matches, sizeof(gmatch_t)*(noofmatches+1)); matches[noofmatches].p = sstart+al->voff; matches[noofmatches].q = sstart+result.a-1; matches[noofmatches].edist = result.b; matches[noofmatches].i = i; matches[noofmatches].j = i+M[i].branches[q].mat+ M[i].branches[q].mis+M[i].branches[q].ins-1; matches[noofmatches].scr = scr; matches[noofmatches].evalue = E; matches[noofmatches].mat = mat; matches[noofmatches].mis = mis; matches[noofmatches].ins = ins; matches[noofmatches].del = del; matches[noofmatches].subject = idx; matches[noofmatches].checklen = matches[noofmatches].j; matches[noofmatches].al = al; noofmatches++; if(bestonly) { bestscr = MIN(maxedist, (result.b+bedist)); } } else { wrapAlignment(al); FREEMEMORY(space, al); } } } } } } } for(i=0; i < s->seq->mapsize; i++) { FREEMEMORY(space, peq[i]); } FREEMEMORY(space, peq); (*bscr) = bestscr; (*nmatch) = noofmatches; return matches; }
int main (int argc, char** argv) { Sint optindex, c; unsigned char depictsw=0; unsigned char wurst=1; unsigned char gnuplot=0; Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0; Uint noofhits=100; Uint substrlen = 10; Uint minseeds = 5; Uint maxmatches = 10000; char *seq, *vec, *bin; imbissinfo *imbiss; void *space = NULL; double *scores = NULL; int swscores[2]={3,-2}; char *pveclistfile=NULL; char *alphabetfile=NULL; char *inputfile=NULL; char *batchfile = NULL; char *subfile=NULL; char *reportfile = NULL; int (*handler) (void *, Matchtype *, IntSequence **, Uint, Uint, void *) = allscores; double (*filter) (void *, Matchtype *, IntSequence *, IntSequence *, Uint *, Uint, Uint, void *) = swconstfilter; Matchtype* (*select) (void *, Matchtype *, Uint k, IntSequence *, IntSequence **, void *) = selectSW; stringset_t **fn, **freq, *queryurl, **queries=NULL; Suffixarray *arr = NULL; IntSequence **sequences = NULL; IntSequence *input = NULL; FAlphabet *alphabet = NULL; PairSint *matches=NULL; Uint percent=0; time_t startsuf, endsuf; double difsuf, difmatch, difrank; #ifdef MEMMAN_H Spacetable spacetab; initmemoryblocks(&spacetab, 100000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", long_options, &optindex); if (c==-1) break; switch(c) { case 'r': reportfile=optarg; break; case 'v': verbose_flag=1; break; case 'd': depictsw = 1; break; case 's': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'q': inputfile = optarg; noofqueries = 1; break; case 'l': substrlen = atoi(optarg); break; case 'c': minseeds = atoi(optarg); break; case 'b': batchfile = optarg; break; case 'p': percent = atoi(optarg); break; case 'x': subfile = optarg; break; case 'n': noofhits = atoi(optarg); break; case 'w': wurst = 0; break; case 'B': filter = scorefilter; select = selectBlastScore; break; case 'S': filter = scorefilter; select = selectScore; break; case 'A': filter = swconstfilter; select = selectSW; break; case 'F': filter = scorefilter; select = selectScoreSWconst; break; case 'G': filter = scorefilter; select = selectBlastScoreSWconst; break; case 'M': swscores[0]=atoi(optarg); break; case 'L': handler = latexscores; break; case 'D': swscores[1]=atoi(optarg); break; case 'g': gnuplot = 1; break; case 'm': maxmatches=atoi(optarg); break; case 'h': default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL) || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1); imbiss->reportfile = reportfile; imbiss->swscores = swscores; imbiss->noofhits = noofhits; imbiss->minseeds = minseeds; imbiss->wurst = wurst; /*read batch file*/ if (batchfile) { queries = readcsv(space, batchfile, "", &noofqueries); } /*read substitution matrix*/ if (subfile) { freq=readcsv(space, subfile,",", &nooffreqs); scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) ); for(i=1; i < nooffreqs; i++) { for(j=1; j < nooffreqs; j++) { if(strcmp(SETSTR(freq[i],j),"inf")==0){ MATRIX2D(scores, nooffreqs-1, i, j)=0; }else{ MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j)); } } } } /*read alphabet*/ if (alphabetfile != NULL) { alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); } /*load sequence database*/ fn=readcsv(space, pveclistfile, "", &noofseqs); sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs); for(i=0; i < noofseqs; i++) { sequences[i] = loadSequence(space, SETSTR(fn[i],0)); } for (i=0; i < noofseqs; i++) { destructStringset(space, fn[i]); } FREEMEMORY(space, fn); /*construct the suffix array*/ time (&startsuf); arr = constructSufArr(space, sequences, noofseqs, NULL); constructLcp(space, arr); time (&endsuf); difsuf = difftime (endsuf, startsuf); /*do search*/ for (i=0; i < noofqueries; i++) { /*get query form batchfile*/ if (queries) { inputfile = SETSTR(queries[i],0); } /*typically only used with batchfile*/ if (percent != 0) { substrlen = ((double)((double)input->length/100)*(double) percent); } input = loadSequence(space, inputfile); //seq = printSequence (space, input, 60); printf(">IMBISS order delivered\n"); //printf("%s\n",seq); printf("%s\n", input->url); //FREEMEMORY(space, seq); time (&startsuf); matches=sufSubstring(space, arr, input->sequence, input->length, substrlen); time (&endsuf); difmatch = difftime (endsuf, startsuf); /*get prob vector url for salami/wurst*/ //printf("%.*s\n", 5, input->url + 58); vec = malloc(sizeof(char)*66); sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56); bin = malloc(sizeof(char)*54); sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56); queryurl = initStringset(space); addString(space, queryurl, bin, strlen(bin)); addString(space, queryurl, vec, strlen(vec)); getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss); imbiss->query = queryurl; imbiss->substrlen = substrlen; imbiss->alphabet = alphabet; /*if a substition file was given ...*/ if (subfile) { imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1); } /*match 'n' report*/ time (&startsuf); imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen)); memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen))); rankSufmatch(space, arr, matches, input->length-substrlen, maxmatches, substrlen, sequences, noofseqs, filter, select, handler, input, imbiss, scores, depictsw); if (gnuplot) { consensus (space, imbiss->consensus, input->length-substrlen, input, substrlen, imbiss); } time (&endsuf); difrank = difftime (endsuf, startsuf); printf ("Building the suffixtree has taken %f seconds.\n", difsuf); printf ("Match the suffixtree has taken %f seconds.\n", difmatch); printf ("Rank the suffixtree has taken %f seconds.\n", difrank); /*partial cleanup*/ //destructStringset(space, queryurl); destructSequence(space, input); if(subfile) { FREEMEMORY(space, imbiss->sub); } FREEMEMORY(space, imbiss->consensus); FREEMEMORY(space, imbiss->score); FREEMEMORY(space, matches); free(bin); free(vec); } /*final cleanup*/ for (i=0; i < noofseqs; i++) { destructSequence(space, sequences[i]); } FREEMEMORY(space, sequences); destructSufArr(space, arr); #ifdef MEMMAN_H activeblocks(space); #endif printf("Goodbye.\n"); return EXIT_SUCCESS; }
Uint * quickSortMultikey (void *space, void* toSort, Uint size, Uint (*cmp)(Uint, Uint, Uint, void *, void*), Uint sentinel, void *info) { Sint a, b, c, d, v, n, r; TripleSint ins; Uint *sorted = NULL, offset; Uint depth = 0; VStack vstack; if (size == 0) return NULL; sorted = ALLOCMEMORY(space, NULL, Uint, size); if (size<=1) { sorted[0]=0; } for (r=0; r < size; r++) sorted[r]=r; bl_vstackInit(&vstack, 100, sizeof(TripleSint)); n = size; offset=0; while (1) { a = rand() % n; SWAPUINT(sorted, offset, a+offset); v = sorted[offset]; a = b = 1; c = d = n-1; for(;;) { while(b<=c&&((r=cmp(sorted[b+offset],v,depth,toSort,info))==2||r==0)) { if (r==0) { SWAPUINT(sorted, a+offset, b+offset); a++; } b++; } while(b<=c&&((r=cmp(sorted[c+offset],v,depth,toSort,info))==1||r==0)) { if (r==0) { SWAPUINT(sorted, c+offset, d+offset); d--; } c--; } if (b > c) break; SWAPUINT(sorted, b+offset, c+offset); b++; c--; } r = MIN(a, (b-a)); vecswap(offset, (b-r)+offset, r, sorted); r = MIN((d-c), (n-d-1)); vecswap(b+offset, (n-r)+offset, r, sorted); /*sort lesser*/ r = b-a; if (r > 1) { ins.a = offset; ins.b = r; ins.c = depth; bl_vstackPush(&vstack, &ins); } /*sort equal*/ if ((a+n-d-1) > 1 && cmp(sorted[r+offset], sentinel, depth, toSort, info) != 0) /*if (r > 1 && sorted[r+offset]!=sentinel)*/ { ins.a = r+offset; ins.b = (a+n-d-1); ins.c = depth+1; bl_vstackPush(&vstack, &ins); } /*sort greater*/ r=d-c; if (r > 1) { ins.a = (n-r)+offset; ins.b = r; ins.c = depth; bl_vstackPush(&vstack, &ins); } if (!bl_vstackIsEmpty(&vstack)){ ins = *((TripleSint *) bl_vstackPop(&vstack, NULL)); offset = ins.a; r = ins.b; depth = ins.c; } else { break; } } bl_vstackDestruct(&vstack, NULL); return sorted; }
Uint * quickSortMultikey (void *space, void* toSort, Uint size, Uint (*cmp)(Uint, Uint, Uint, void *, void*), Uint sentinel, void *info) { Sint a, b, c, d, v, n, r; Uint *sorted = NULL, offset; Uint depth = 0; Stack stack; if (size == 0) return NULL; sorted = ALLOCMEMORY(space, NULL, Uint, size); if (size<=1) { sorted[0]=0; } for (r=0; r < size; r++) sorted[r]=r; initStack(space, &stack, 100); n = size; offset=0; while (1) { a = rand() % n; SWAPUINT(sorted, offset, a+offset); v = sorted[offset]; a = b = 1; c = d = n-1; for(;;) { while(b<=c&&((r=cmp(sorted[b+offset],v,depth,toSort,info))==2||r==0)) { if (r==0) { SWAPUINT(sorted, a+offset, b+offset); a++; } b++; } while(b<=c&&((r=cmp(sorted[c+offset],v,depth,toSort,info))==1||r==0)) { if (r==0) { SWAPUINT(sorted, c+offset, d+offset); d--; } c--; } if (b > c) break; SWAPUINT(sorted, b+offset, c+offset); b++; c--; } r = MIN(a, (b-a)); vecswap(offset, (b-r)+offset, r, sorted); r = MIN((d-c), (n-d-1)); vecswap(b+offset, (n-r)+offset, r, sorted); /*sort lesser*/ r = b-a; if (r > 1) { stackpush(space, &stack, offset); stackpush(space, &stack, r); stackpush(space, &stack, depth); } /*sort equal*/ if ((a+n-d-1) > 1 && cmp(sorted[r+offset], sentinel, depth, toSort, info) != 0) /*if (r > 1 && sorted[r+offset]!=sentinel)*/ { stackpush(space, &stack, r+offset); stackpush(space, &stack, (a+n-d-1)); stackpush(space, &stack, depth+1); } /*sort greater*/ r=d-c; if (r > 1) { stackpush(space, &stack, (n-r)+offset); stackpush(space, &stack, r); stackpush(space, &stack, depth); } if (!stackisempty(&stack)) { depth = stackpop(&stack); n = stackpop(&stack); offset = stackpop(&stack); } else { break; } } destructStack(space, &stack); return sorted; }
void constructsuflinks(void *space, Suffixarray *s) { Uint i, j, a, b, k, nooflists, lcp, pos; Stack istack; Stack jstack; List *children, **lists; PairUint **data, slinkinterval; nooflists = maxlcp(s) +1; lists = ALLOCMEMORY(space, NULL, List*, nooflists); memset(lists, 0, sizeof(List*)*nooflists); initStack(space, &istack, 1000); initStack(space, &jstack, 1000); stackpush(space, &istack, 0); stackpush(space, &jstack, s->numofsuffixes-1); while(!stackisempty(&istack)) { i = stackpop(&istack); j = stackpop(&jstack); lcp = getlcpval(s, i, j); /*printf("adding list %d\n", lcp);*/ if (lists[lcp] == NULL) { lists[lcp] = initList(space, 10); } addinterval(space, lists[lcp], i, j); /*printf("lcp: %d-[%d,%d]\n", lcp, i, j);*/ children = getChildintervals(space, s, i, j); data = (PairUint**) dataList(space, children); for(k=children->length; k > 0; k--) { a = data[k-1]->a; b = data[k-1]->b; FREEMEMORY(space, data[k-1]); if(a != b) { stackpush(space, &istack, a); stackpush(space, &jstack, b); } } FREEMEMORY(space, data); wrapList(space, children, NULL); } destructStack(space, &istack); destructStack(space, &jstack); s->suflink_l = ALLOCMEMORY(space, NULL, Uint, s->numofsuffixes); s->suflink_r = ALLOCMEMORY(space, NULL, Uint, s->numofsuffixes); memset(s->suflink_l, 0, sizeof(Uint)*s->numofsuffixes); memset(s->suflink_r, 0, sizeof(Uint)*s->numofsuffixes); for(i=1; i < nooflists; i++) { if(lists[i] != NULL && lists[i-1] !=NULL) { for(j=0; j < lists[i]->length; j++) { /*printf("looking at interval [%d,%d], list %d\n", ((PairUint*)lists[i]->nodes[j].data)->a, ((PairUint*)lists[i]->nodes[j].data)->b, i);*/ slinkinterval = findslinkinterval(space, s, lists, i, j); pos = getfirstlindex(s, ((PairUint*)lists[i]->nodes[j].data)->a, ((PairUint*)lists[i]->nodes[j].data)->b); /*printf("store at %d: [%d,%d]\n", pos, slinkinterval.a, slinkinterval.b);*/ s->suflink_l[pos]=slinkinterval.a; s->suflink_r[pos]=slinkinterval.b; } } wrapList(space, lists[i-1], destructinterval); } FREEMEMORY(space, lists); return; }