/* test the motif and encode the characters by using alpha */ int testmotifandencodemotif (Motif *motif, const Encodedsequence *encseq, GtError *err) { const GtUchar *symbolmap; GtUchar c_tab[UCHAR_MAX+1]; unsigned int i; symbolmap = getencseqAlphabetsymbolmap(encseq); if ( symbolmap[(unsigned int)motif->firstleft] == (GtUchar) UNDEFCHAR) { gt_error_set(err,"Illegal nucleotide character %c " "as argument to option -motif", motif->firstleft); return -1; } if ( symbolmap[(unsigned int)motif->secondleft] == (GtUchar) UNDEFCHAR ) { gt_error_set(err,"Illegal nucleotide character %c " "as argument to option -motif", motif->secondleft); return -1; } if ( symbolmap[(unsigned int)motif->firstright] == (GtUchar) UNDEFCHAR ) { gt_error_set(err,"Illegal nucleotide character %c " "as argument to option -motif", motif->firstright); return -1; } if ( symbolmap[(unsigned int)motif->secondright] == (GtUchar) UNDEFCHAR ) { gt_error_set(err,"Illegal nucleotide character %c " "as argument to option -motif", motif->secondright); return -1; } for (i=0; i<=(unsigned int) UCHAR_MAX; i++) { c_tab[i] = (GtUchar) UNDEFCHAR; } /* define complementary symbols */ c_tab[symbolmap['a']] = symbolmap['t']; c_tab[symbolmap['c']] = symbolmap['g']; c_tab[symbolmap['g']] = symbolmap['c']; c_tab[symbolmap['t']] = symbolmap['a']; /* if motif is not palindromic */ if ( (c_tab[symbolmap[(unsigned int)motif->firstleft]] != c_tab[c_tab[symbolmap[(unsigned int)motif->secondright]]]) || (c_tab[symbolmap[(unsigned int)motif->secondleft]] != c_tab[c_tab[symbolmap[(unsigned int)motif->firstright]]]) ) { gt_error_set(err, "Illegal motif, motif not palindromic"); return -1; } /* encode the symbols */ motif->firstleft = symbolmap[(unsigned int)motif->firstleft]; motif->secondleft = symbolmap[(unsigned int)motif->secondleft]; motif->firstright = symbolmap[(unsigned int)motif->firstright]; motif->secondright = symbolmap[(unsigned int)motif->secondright]; return 0; }
static int testfullscan(const GtStrArray *filenametab, const Encodedsequence *encseq, Readmode readmode, GtError *err) { Seqpos pos, totallength; GtUchar ccscan = 0, ccra, ccsr; GtSequenceBuffer *fb = NULL; int retval; bool haserr = false; Encodedsequencescanstate *esr; unsigned long long fullscanpbar = 0; gt_error_check(err); totallength = getencseqtotallength(encseq); gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength); if (filenametab != NULL) { fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err); if (!fb) haserr = true; if (!haserr) gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq)); } if (!haserr) { esr = newEncodedsequencescanstate(); initEncodedsequencescanstate(esr,encseq,readmode,0); for (pos=0; /* Nothing */; pos++) { if (filenametab != NULL && readmode == Forwardmode) { retval = gt_sequence_buffer_next(fb,&ccscan,err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } } else { if (pos >= totallength) { break; } } ccra = getencodedchar(encseq,pos,readmode); /* Random access */ if (filenametab != NULL && readmode == Forwardmode) { if (ccscan != ccra) { gt_error_set(err,"access=%s, position=" FormatSeqpos ": scan (readnextchar) = %u != " "%u = random access", encseqaccessname(encseq), pos, (unsigned int) ccscan, (unsigned int) ccra); haserr = true; break; } } ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos ": random access = %u != %u = sequential read", encseqaccessname(encseq), showreadmode(readmode), pos, (unsigned int) ccra, (unsigned int) ccsr); haserr = true; break; } fullscanpbar++; } gt_progressbar_stop(); } if (!haserr) { if (pos != totallength) { gt_error_set(err,"sequence length must be " FormatSeqpos " but is " FormatSeqpos,totallength,pos); haserr = true; } } freeEncodedsequencescanstate(&esr); gt_sequence_buffer_delete(fb); return haserr ? -1 : 0; }
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; Verboseinfo *verboseinfo; const Encodedsequence *encseq = NULL; verboseinfo = newverboseinfo(idxlocalioptions->verbose); if (idxlocalioptions->doonline) { encseq = mapencodedsequence (true, idxlocalioptions->indexname, true, false, false, true, verboseinfo, err); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(idxlocalioptions->indexname, idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, verboseinfo, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; Processmatch processmatch; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; if (idxlocalioptions->docompare) { processmatch = storematch; initstorematch(&storeonline,encseq); initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = getencseqAlphabetcharacters(encseq); showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } gt_free(desc); } if (limdfsresources != NULL) { freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seqiterator_delete(seqit); } if (idxlocalioptions->docompare) { freestorematch(&storeonline); freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_assert(encseq != NULL); encodedsequence_free((Encodedsequence **) &encseq); } else { genericindex_delete(genericindex); } freeverboseinfo(&verboseinfo); return haserr ? -1 : 0; }