void kmismatch(void *space, Suffixarray *s, fasta_t *reads, Uint k, Uint* counter, Uint rep_type, unsigned char silent, FILE *dev) { Uint i, curlen; char *buffer, *curseq; branch_t *V; Gmap map; Uint noofmatches=0; gread_t read; Container C; pthread_mutex_t *mtx=NULL; if (counter == NULL) { initProgressBarVT(); } else { mtx = &mutex2; } initGmap(&map, s->seq, 1); for (i=0; i < reads->noofseqs; i++) { noofmatches = 0; initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); if (!silent) { if (mtx == NULL) { progressBarVT("reads matched.", reads->noofseqs, i, 25); } else { (*counter)++; } } curseq = reads->seqs[i]->sequence; curlen = reads->seqs[i]->length; V=kmis(space, s, curseq, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), PLUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } initRead(&read, reads->seqs[i]); setReads(&map, &read, 1); buffer = charDNAcomplement(space, curseq, curlen); V=kmis(space, s, buffer, curlen, k, &noofmatches); if(noofmatches) { bl_containerInit(&C, 100, sizeof(gmatch_t)); branch2match(s, &C, V, noofmatches); setMatches(&read, (gmatch_t*)C.contspace, bl_containerSize(&C), MINUSSTRAND); reportMatch(dev, &map, rep_type, 0, mtx, curlen, curlen); bl_containerDestruct(&C, NULL); FREEMEMORY(space, V); } FREEMEMORY(space, buffer); } return; }
int main(int argc, char** argv) { Uint noofvecs, i; Sint optindex, c; vector_t info; #ifdef MEMMAN_H Spacetable spacetab; #endif void *space = NULL; char *url = NULL; char *outpath = NULL; char *pveclistfile = NULL; char *alphabetfile = NULL; char *vecext="vec"; char *seqext="seq"; struct prob_vec *p_vec; IntSequence *sequence; FAlphabet *alphabet; stringset_t *tok; stringset_t **fn; #ifdef MEMMAN_H initmemoryblocks(&spacetab, 1000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "f:a:o:", long_options, &optindex); if (c==-1) break; switch(c) { case 'f': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'o': outpath = optarg; break; default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } fn=readcsv(space, pveclistfile, ".", &noofvecs); alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); for(i=0; i<noofvecs; i++) { INITVECTOR(&info); SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), vecext, SETSTRLEN(fn[i],0), 3, '.'); p_vec = prob_vec_read (SETSTR(fn[i],0)); if (p_vec->mship == NULL) prob_vec_expand(p_vec); sequence = encode_prob_vec(space, alphabet, p_vec, 0, 0, cantorchar, &info); sequence->info = (Uint*) info.elements; sequence->namelen= strlen(alphabetfile); COPYSTR(space, sequence->alphabetname, alphabetfile, strlen(alphabetfile)); /*this is a potential security risk*/ if (p_vec->compnd_len > 0) { sequence->descrlen = p_vec->compnd_len-1; COPYSTR(space, sequence->description, p_vec->compnd, p_vec->compnd_len-1); } else { sequence->descrlen = 14; COPYSTR(space, sequence->description, "descriptor n/a", 14); } sequence->urllen = SETSTRLEN(fn[i],0); COPYSTR(space, sequence->url, SETSTR(fn[i],0), SETSTRLEN(fn[i],0)); SETSTR(fn[i],0) = concatdelim(space, SETSTR(fn[i],0), seqext, SETSTRLEN(fn[i],0), 3, '.'); SETSTRLEN(fn[i],0) += 4; if (outpath) { tok = tokensToStringset(space, "/", SETSTR(fn[i],0), SETSTRLEN(fn[i],0)); COPYSTR(space, url, outpath, strlen(outpath)); url = concat(space, url, SETSTR(tok, tok->noofstrings-1), strlen(url), SETSTRLEN(tok, tok->noofstrings-1)); saveSequence(sequence, url); destructStringset(space, tok); FREEMEMORY(space, url); url = NULL; } else { saveSequence(sequence, SETSTR(fn[i],0)); } destructSequence (space, sequence); prob_vec_destroy (p_vec); destructStringset (space, fn[i]); progressBarVT("probability vectors converted", noofvecs-1, i, 25); } printf("\nexit.\n"); FREEMEMORY(space, fn); destructAlphabet(space, alphabet); return EXIT_SUCCESS; }
void matchkdseed( void *space, Suffixarray *s, fasta_t *reads, Uint minsize, char *outfile, Uint *counter, unsigned char silent, Uint s_ext, Uint p_mis, Uint Xoff, Uint k_p, Uint rep_type, Uint hitstrategy, Uint bedist, unsigned char showalignment, double maxevalue, int acc, Uint M, unsigned char matchingstat, FILE *dev, FILE *nomatchdev) { double H, K, lambda; char *buffer, *curseq; Uint k, curlen, dim, wordno; bitvector *D, *Mv; Gmap map; gread_t read; gmatch_t *mmatches=NULL, *pmatches=NULL; matchstem_t *V; int plusdiff, minusdiff, noofmatches=0; pthread_mutex_t *mtx=NULL; pthread_mutex_t *mtx2=NULL; Uint *enctab, i,j, nmmatch, npmatch; int bmscr, bpscr; unsigned char uninformative = 0, beststrand = 0, best = 0; if (hitstrategy == 2) { beststrand = 1; best = 1; } else if (hitstrategy == 1) { beststrand = 1; } /*build alignment matrix*/ enctab = encodetab(s->seq->map, s->seq->mapsize); dim = reads->maxlen + 2*((reads->maxlen-ceil((acc*reads->maxlen)/100))+4); wordno = reads->maxlen/BITVECTOR_WORDSIZE; // wordno += ((reads->maxlen & (BITVECTOR_WORDSIZE-1)) > 0) ? 1 : 0; wordno++; D = ALLOCMEMORY(space, NULL, bitvector, 2*(dim+1)); Mv = &D[dim+1]; for(i=0; i <= dim; i++) { D[i] = initbitvector(space, wordno*BITVECTOR_WORDSIZE); Mv[i] = initbitvector(space, wordno*BITVECTOR_WORDSIZE); } if (counter == NULL) { initProgressBarVT(); } else { mtx = &mutex1; mtx2 = &mutex2; } karlinunitcostpp(space, &lambda, &H, &K); for (k=0; k < reads->noofseqs; k++) { plusdiff = 0; minusdiff = 0; noofmatches = 0; if (!silent) { if (counter == NULL) { progressBarVT("reads matched.", reads->noofseqs, k, 25); } else { (*counter)++; } } curseq = reads->seqs[k]->sequence; curlen = reads->seqs[k]->length; npmatch = 0; nmmatch = 0; if(curlen >= minsize) { initGmap(&map, s->seq, 1); initRead(&read, reads->seqs[k]); bpscr = 0; bmscr = 0; V=kdseeds(space, s, curseq, curlen, s_ext, p_mis, Xoff, k_p); #ifdef KDUNINFORMATIVE if(V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) { uninformative = 1; plusdiff = V[0].branches[0].r - V[0].branches[0].l; } else #endif if(!matchingstat) { uninformative = 0; pmatches = alignkdmatches(space, s, V, reads->seqs[k], curseq, curlen, M, enctab, beststrand, bedist, lambda, H, K, maxevalue, acc, D, dim, &npmatch, &bpscr); if(npmatch > 0) { setMatches(&read, pmatches, npmatch, PLUSSTRAND); } } else { printf("#%d %s\n",curlen, reads->seqs[k]->description); dumpkdseeds(s, V, curlen, '+', M); } for(j=0; j < curlen; j++) { if (V[j].noofbranches > 0) { FREEMEMORY(space, V[j].branches); } } FREEMEMORY(space, V); /*search the complement*/ buffer = charDNAcomplement(space, curseq, curlen); V=kdseeds(space, s, buffer, curlen, s_ext, p_mis, Xoff, k_p); #ifdef KDUNINFORMATIVE if(uninformative && V[0].branches[0].r > V[0].branches[0].l && V[0].branches[0].r-V[0].branches[0].l > M) { minusdiff = V[0].branches[0].r - V[0].branches[0].l; uninformative = 1; } else #endif if(!matchingstat) { uninformative = 0; mmatches = alignkdmatches(space, s, V, reads->seqs[k], buffer, curlen, M, enctab, beststrand, bedist, lambda, H, K, maxevalue, acc, D, dim, &nmmatch, &bmscr); if(nmmatch > 0) { setMatches(&read, mmatches, nmmatch, MINUSSTRAND); } if (best) { bpscr = MIN(bmscr, bpscr); bmscr = bpscr; } else if (!beststrand) { bpscr = bmscr = curlen-ceil((acc*curlen)/100); } setReads(&map, &read, 1); reportMatch(dev, &map, rep_type, showalignment, mtx, bpscr, bmscr); FREEMEMORY(space, pmatches); FREEMEMORY(space, mmatches); pmatches = NULL; mmatches = NULL; } else { dumpkdseeds(s, V, curlen, '-', M); } for(j=0; j < curlen; j++) { if (V[j].noofbranches > 0) { FREEMEMORY(space, V[j].branches); } } FREEMEMORY(space, V); FREEMEMORY(space, buffer); } if(nomatchdev && nmmatch == 0 && npmatch == 0) { if (mtx2 != NULL) pthread_mutex_lock(mtx2); fprintf(nomatchdev, "%s\n%s\n", reads->seqs[k]->description, reads->seqs[k]->sequence); fflush(nomatchdev); if (mtx2 != NULL) pthread_mutex_unlock(mtx2); } } wrapBitmatrix(space, D, 2*(dim+1)); FREEMEMORY(space, D); FREEMEMORY(space, enctab); return; }