static int outputsortedstring2indexviafileptr(const GtEncseq *encseq, GtUword mersize, GtUchar *bytebuffer, GtUword sizeofbuffer, FILE *merindexfpout, FILE *countsfilefpout, GtUword position, GtUword countocc, GtArrayLargecount *largecounts, GtUword countoutputmers, GT_UNUSED GtError *err) { gt_encseq_sequence2bytecode(bytebuffer,encseq,position,mersize); gt_xfwrite(bytebuffer, sizeof (*bytebuffer), (size_t) sizeofbuffer, merindexfpout); if (countsfilefpout != NULL) { GtUchar smallcount; if (countocc <= MAXSMALLMERCOUNT) { smallcount = (GtUchar) countocc; } else { Largecount *lc; GT_GETNEXTFREEINARRAY(lc,largecounts,Largecount,32); lc->idx = countoutputmers; lc->value = countocc; smallcount = 0; } gt_xfwrite(&smallcount, sizeof (smallcount),(size_t) 1,countsfilefpout); } return 0; }
void gt_wlis_filter_matches_add(GtWLisFilterMatches *wlismatches, GtUword s_start, GtUword s_end, GtUword q_start, GtUword q_end, GtUword distance, bool store_querymatch) { GtUword aligned_len; float prob_id; GtWlisItem *current_match; gt_assert(wlismatches != NULL); GT_GETNEXTFREEINARRAY(current_match,&wlismatches->items,GtWlisItem, wlismatches->items. allocatedGtWlisItem * 0.2 + 256); current_match->startpos[0] = s_start; current_match->startpos[1] = q_start; current_match->endpos[0] = s_end; current_match->endpos[1] = q_end; if (store_querymatch) { gt_assert(wlismatches->items.nextfreeGtWlisItem > 0); current_match->oi_di.original_index = wlismatches->items.nextfreeGtWlisItem - 1; } else { current_match->oi_di.distance = distance; } aligned_len = gt_wlis_filter_aligned_len(current_match); prob_id = (float) (aligned_len - 2 * distance)/aligned_len; current_match->weight = prob_id * prob_id; }
GtBareEncseq *gt_bare_encseq_new(GtUchar *sequence,GtUword len, GtUword numofchars) { GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *readptr; GtBareSpecialrange *srptr = NULL; GtUword lastspecialrange_length = 0; bare_encseq->specialcharacters = 0; bare_encseq->numofchars = numofchars; bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); for (readptr = sequence; readptr < sequence + len; readptr++) { GtUchar cc = *readptr; if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (readptr - sequence); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = sequence; bare_encseq->totallength = len; return bare_encseq; }
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes, const GtAlphabet *alphabet, GtError *err) { GtUchar *writeptr = filecontents, *readptr = filecontents; const GtUchar *endptr = filecontents + numofbytes; bool firstline = true, haserr = false; GtUword lastspecialrange_length = 0; GtBareSpecialrange *srptr = NULL; GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *smap = gt_alphabet_symbolmap(alphabet); bare_encseq->specialcharacters = 0; bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet); bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); readptr = filecontents; while (!haserr && readptr < endptr) { if (*readptr == '>') { if (!firstline) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; *writeptr++ = SEPARATOR; bare_encseq->specialcharacters++; } else { firstline = false; } while (readptr < endptr && *readptr != '\n') { readptr++; } readptr++; } else { while (readptr < endptr && *readptr != '\n') { if (!isspace(*readptr)) { GtUchar cc = smap[*readptr]; if (cc == UNDEFCHAR) { gt_error_set(err,"illegal input characters %c\n",*readptr); haserr = true; break; } if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } *writeptr++ = cc; } readptr++; } readptr++; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = filecontents; bare_encseq->totallength = (GtUword) (writeptr - filecontents); if (haserr) { gt_bare_encseq_delete(bare_encseq); return NULL; } return bare_encseq; }