GtWtree* gt_wtree_encseq_new(GtEncseq *encseq) { /* sample rate for compressd bitseq */ const unsigned int samplerate = 32U; GtWtree *wtree; GtWtreeEncseq *wtree_encseq; wtree = gt_wtree_create(gt_wtree_encseq_class()); wtree_encseq = gt_wtree_encseq_cast(wtree); wtree_encseq->encseq = gt_encseq_ref(encseq); wtree_encseq->alpha = gt_alphabet_ref(gt_encseq_alphabet(encseq)); /* encoded chars + WC given by gt_alphabet_size, we have to encode UNDEFCHAR and SEPARATOR too */ wtree_encseq->alpha_size = gt_alphabet_size(wtree_encseq->alpha) + 2; wtree->members->num_of_symbols = (GtUword) wtree_encseq->alpha_size; /* levels in tree: \lceil log_2(\sigma)\rceil */ wtree_encseq->levels = gt_determinebitspervalue((GtUword) wtree_encseq->alpha_size); wtree_encseq->root_fo = gt_wtree_encseq_fill_offset_new(); wtree_encseq->current_fo = wtree_encseq->root_fo; wtree->members->length = gt_encseq_total_length(encseq); /* each level has number of symbols bits */ wtree_encseq->num_of_bits = wtree_encseq->levels * wtree->members->length; wtree_encseq->bits_size = wtree_encseq->num_of_bits / (sizeof (GtBitsequence) * CHAR_BIT); if (wtree_encseq->num_of_bits % (sizeof (GtBitsequence) * CHAR_BIT) != 0) wtree_encseq->bits_size++; wtree_encseq->bits = gt_calloc((size_t) wtree_encseq->bits_size, sizeof (GtBitsequence)); wtree_encseq->node_start = 0; gt_wtree_encseq_fill_bits(wtree_encseq); wtree_encseq->c_bits = gt_compressed_bitsequence_new(wtree_encseq->bits, samplerate, wtree_encseq->num_of_bits); gt_free(wtree_encseq->bits); wtree_encseq->bits = NULL; return wtree; }
void gt_alphabet_add_mapping(GtAlphabet *alphabet, const char *characters) { size_t i, num_of_characters; gt_assert(alphabet && characters); num_of_characters = strlen(characters); gt_assert(num_of_characters); alphabet->mapdomain = gt_realloc(alphabet->mapdomain, (size_t) alphabet->domainsize + num_of_characters); memcpy(alphabet->mapdomain + alphabet->domainsize, characters, num_of_characters); alphabet->domainsize += num_of_characters; alphabet->symbolmap[(int) characters[0]] = (GtUchar) alphabet->mapsize; alphabet->characters = gt_realloc(alphabet->characters, (size_t) alphabet->domainsize); alphabet->characters[alphabet->mapsize] = (GtUchar) characters[0]; for (i = 0; i < num_of_characters; i++) alphabet->symbolmap[(int) characters[i]] = (GtUchar) alphabet->mapsize; alphabet->mapsize++; alphabet->bitspersymbol = gt_determinebitspervalue((unsigned long) alphabet->mapsize); }
static int read_symbolmap_from_lines(GtAlphabet *alpha, const char *mapfile, const GtStrArray *lines, GtError *err) { char cc; unsigned int cnum, allocateddomainsize = 0; unsigned long linecount, column; bool blankfound, ignore, preamble = true, haserr = false; const char *currentline; GtUchar chartoshow; gt_error_check(err); alpha->alphadef = gt_str_new(); alpha->domainsize = alpha->mapsize = alpha->mappedwildcards = 0; for (cnum=0; cnum<=(unsigned int) GT_MAXALPHABETCHARACTER; cnum++) { alpha->symbolmap[cnum] = (GtUchar) UNDEFCHAR; } alpha->mapdomain = NULL; alpha->characters = gt_malloc(sizeof (GtUchar) * (gt_str_array_size(lines)-1)); for (linecount = 0; linecount < gt_str_array_size(lines); linecount++) { currentline = gt_str_array_get(lines,linecount); gt_str_append_cstr(alpha->alphadef, currentline); gt_str_append_char(alpha->alphadef, '\n'); ignore = false; if (currentline != NULL && currentline[0] != '\0') { if (preamble) { if (LINE(0) == (GtUchar) '#') { ignore = true; } else { preamble = false; } } if (!ignore) { blankfound = false; for (column=0; LINE(column) != '\0'; column++) { /* for all chars in line */ cc = LINE(column); if (ispunct((int) cc) || isalnum((int) cc)) { if (alpha->symbolmap[(unsigned int) cc] != (GtUchar) UNDEFCHAR) { gt_error_set(err,"cannot map symbol '%c' to %u: " "it is already mapped to %u", cc, alpha->mapsize, (unsigned int) alpha-> symbolmap[(unsigned int) cc]); haserr = true; break; } /* get same value */ alpha->symbolmap[(unsigned int) cc] = (GtUchar) alpha->mapsize; if (alpha->domainsize >= allocateddomainsize) { allocateddomainsize += 8; alpha->mapdomain = gt_realloc(alpha->mapdomain, sizeof (GtUchar) * allocateddomainsize); } gt_assert(alpha->mapdomain != NULL); alpha->mapdomain[alpha->domainsize++] = (GtUchar) cc; } else { if (cc == (GtUchar) ' ') /* first blank in line found */ { blankfound = true; /*@innerbreak@*/ break; } if (mapfile != NULL) { gt_error_set(err, "illegal character '%c' in line %lu of mapfile %s", cc,linecount,mapfile); } else { gt_error_set(err, "illegal character '%c' in line %lu of alphabet " "definition", cc,linecount); } haserr = true; break; } } if (haserr) { break; } if (blankfound) { if (isspace((int) LINE(column+1))) { if (mapfile != NULL) { gt_error_set(err,"illegal character '%c' at the end of " "line %lu in mapfile %s", LINE(column+1),linecount,mapfile); } else { gt_error_set(err,"illegal character '%c' at the end of " "line %lu of alphabet definition", LINE(column+1),linecount); } haserr = true; break; } /* use next character to display character */ chartoshow = (GtUchar) LINE(column+1); } else { /* use first character of line to display character */ chartoshow = (GtUchar) LINE(0); } if (linecount == gt_str_array_size(lines)-1) { alpha->wildcardshow = chartoshow; } else { alpha->characters[alpha->mapsize] = chartoshow; } alpha->mapsize++; } } } if (!haserr) { for (cnum=0;cnum<=(unsigned int) GT_MAXALPHABETCHARACTER; cnum++) { if (alpha->symbolmap[cnum] == (GtUchar) (alpha->mapsize - 1)) { alpha->symbolmap[cnum] = (GtUchar) WILDCARD; alpha->mappedwildcards++; } } } /* there are mapsize-1 characters plus wildcard plus separator. hence there are mapsize+1 symbols in the range 0..mapsize. that is, mapsize is the largest symbol and we obtain */ alpha->bitspersymbol = gt_determinebitspervalue((unsigned long) alpha->mapsize); return haserr ? -1 : 0; }