static int runmkfmindex(Mkfmcallinfo *mkfmcallinfo,GtLogger *logger, GtError *err) { Fmindex fm; unsigned int log2bsize, log2markdist; bool haserr = false; GtSpecialcharinfo specialcharinfo; gt_error_check(err); GT_INITARRAY(&fm.specpos, GtPairBwtidx); fm.bfreq = NULL; fm.superbfreq = NULL; fm.tfreq = NULL; fm.markpostable = NULL; fm.boundarray = NULL; fm.suffixlength = 0; if (levedescl2levelnum(gt_str_get(mkfmcallinfo->leveldesc), &log2bsize, &log2markdist) != 0) { gt_error_set(err,"undefined level \"%s\"", gt_str_get(mkfmcallinfo->leveldesc)); haserr = true; } if (!haserr && gt_sufbwt2fmindex(&fm, &specialcharinfo, log2bsize, log2markdist, gt_str_get(mkfmcallinfo->outfmindex), mkfmcallinfo->indexnametab, mkfmcallinfo->noindexpos ? false : true, logger, err) != 0) { haserr = true; } if (!haserr && gt_saveFmindex(gt_str_get(mkfmcallinfo->outfmindex), &fm, &specialcharinfo, mkfmcallinfo->noindexpos ? false : true, err) < 0) { haserr = true; } freeconstructedfmindex(&fm); return haserr ? -1 : 0; }
GtBareEncseq *gt_bare_encseq_new(GtUchar *sequence,GtUword len, GtUword numofchars) { GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *readptr; GtBareSpecialrange *srptr = NULL; GtUword lastspecialrange_length = 0; bare_encseq->specialcharacters = 0; bare_encseq->numofchars = numofchars; bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); for (readptr = sequence; readptr < sequence + len; readptr++) { GtUchar cc = *readptr; if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (readptr - sequence); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = sequence; bare_encseq->totallength = len; return bare_encseq; }
GtSeqIterator* gt_seq_iterator_sequence_buffer_new_with_buffer( GtSequenceBuffer *buffer) { GtSeqIterator *si; GtSeqIteratorSequenceBuffer *seqit; si = gt_seq_iterator_create(gt_seq_iterator_sequence_buffer_class()); seqit = gt_seq_iterator_sequence_buffer_cast(si); GT_INITARRAY(&seqit->sequencebuffer, GtUchar); seqit->descptr = gt_desc_buffer_new(); seqit->fb = gt_sequence_buffer_ref(buffer); gt_sequence_buffer_set_desc_buffer(seqit->fb, seqit->descptr); seqit->exhausted = false; seqit->unitnum = 0; seqit->withsequence = true; seqit->currentread = 0; seqit->maxread = 0; return si; }
static void gt_kmer_database_add_to_hash(GtHashmap *hash, GtCodetype kmercode, GtUword position) { GtArrayGtUword *arr = (GtArrayGtUword *) gt_hashmap_get(hash, (void *) kmercode); if (arr == NULL) { arr = gt_malloc(sizeof (*arr)); GT_INITARRAY(arr, GtUword); gt_hashmap_add(hash, (void *) kmercode, (void *) arr); } if (arr->allocatedGtUword == 0) GT_STOREINARRAY(arr, GtUword, (GtUword) 20, position); else GT_STOREINARRAY(arr, GtUword, arr->allocatedGtUword * 0.1, position); }
int gt_verifymappedstr(const GtEncseq *encseq, unsigned int prefixlength, GtError *err) { unsigned int numofchars; GtArrayGtCodetype codeliststream; bool haserr = false; gt_error_check(err); numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq)); GT_INITARRAY(&codeliststream,GtCodetype); if (getfastastreamkmers(gt_encseq_filenames(encseq), numofchars, prefixlength, gt_alphabet_symbolmap( gt_encseq_alphabet(encseq)), false, &codeliststream, err) != 0) { haserr = true; } if (!haserr) { if (verifycodelists(encseq, prefixlength, numofchars, &codeliststream, err) != 0) { haserr = true; } } GT_FREEARRAY(&codeliststream,GtCodetype); return haserr ? -1 : 0; }
/*read condenseq data structure from file*/ GtCondenseq *gt_condenseq_new_from_file(const char *indexname, GtLogger *logger, GtError *err) { int had_err = 0; FILE* fp; GtEncseqLoader *esl; GtEncseq *unique_es; GtCondenseq *condenseq = NULL; /*load unique_es*/ esl = gt_encseq_loader_new(); unique_es = gt_encseq_loader_load(esl, indexname, err); if (!unique_es) had_err = -1; if (!had_err) { gt_encseq_loader_delete(esl); condenseq = condenseq_new_empty(gt_encseq_alphabet(unique_es)); condenseq->filename = gt_cstr_dup(indexname); condenseq->unique_es = unique_es; fp = gt_fa_fopen_with_suffix(indexname, GT_CONDENSEQ_FILE_SUFFIX, "rb", err); if (fp == NULL) { had_err = -1; } else { had_err = condenseq_io(condenseq, fp, gt_io_error_fread, err); if (!had_err) { GtUword i; gt_assert(condenseq->uniques); gt_assert(condenseq->links); gt_fa_fclose(fp); /*create link array for each unique entry*/ for (i = 0; i < condenseq->udb_nelems; i++) { GT_INITARRAY(&(condenseq->uniques[i].links),uint32_t); } /* check for overflows */ if (condenseq->ldb_nelems > (GtUword) ((uint32_t) 0 - (uint32_t) 1)) { gt_error_set(err, "Overflow, to many link-elements. Can't be stored"); had_err = -1; } /* iterate through link entrys and store ids in corresponding unique entry array */ for (i = 0; !had_err && (GtUword) i < condenseq->ldb_nelems; i++) { GtUword uid = condenseq->links[i].unique_id; gt_assert(uid < condenseq->udb_nelems); GT_STOREINARRAY(&(condenseq->uniques[uid].links), uint32_t, 10, (uint32_t) i); } } } } if (!had_err) { gt_assert(condenseq != NULL); if (condenseq->id_len != GT_UNDEF_UWORD) gt_logger_log(logger, "IDs const len: " GT_WU, condenseq->id_len); else gt_logger_log(logger, "using sdstab to access IDs"); } if (had_err) { gt_condenseq_delete(condenseq); condenseq = NULL; } return (condenseq); }
GtOutlcpinfo *gt_Outlcpinfo_new(const char *indexname, unsigned int numofchars, unsigned int prefixlength, bool withdistribution, bool swallow_tail_lcpvalues, GtFinalProcessBucket final_process_bucket, void *final_process_bucket_info, GtError *err) { bool haserr = false; GtOutlcpinfo *outlcpinfo; outlcpinfo = gt_malloc(sizeof (*outlcpinfo)); outlcpinfo->sizeofinfo = sizeof (*outlcpinfo); outlcpinfo->lcpsubtab.lcptabsum = 0.0; outlcpinfo->swallow_tail_lcpvalues = swallow_tail_lcpvalues; if (withdistribution) { outlcpinfo->lcpsubtab.distlcpvalues = gt_disc_distri_new(); } else { outlcpinfo->lcpsubtab.distlcpvalues = NULL; } if (indexname == NULL) { outlcpinfo->lcpsubtab.lcp2file = NULL; if (final_process_bucket != NULL) { outlcpinfo->lcpsubtab.lcpprocess = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcpprocess)); outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket = final_process_bucket; outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket_info = final_process_bucket_info; } else { outlcpinfo->lcpsubtab.lcpprocess = NULL; } } else { outlcpinfo->lcpsubtab.lcpprocess = NULL; outlcpinfo->lcpsubtab.lcp2file = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcp2file)); outlcpinfo->sizeofinfo += sizeof (*outlcpinfo->lcpsubtab.lcp2file); outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues = 0; outlcpinfo->lcpsubtab.lcp2file->maxbranchdepth = 0; outlcpinfo->lcpsubtab.lcp2file->totalnumoflargelcpvalues = 0; outlcpinfo->lcpsubtab.lcp2file->reservoir = NULL; outlcpinfo->lcpsubtab.lcp2file->sizereservoir = 0; outlcpinfo->lcpsubtab.lcp2file->smalllcpvalues = NULL; GT_INITARRAY(&outlcpinfo->lcpsubtab.lcp2file->largelcpvalues, Largelcpvalue); outlcpinfo->lcpsubtab.lcp2file->outfplcptab = gt_fa_fopen_with_suffix(indexname,GT_LCPTABSUFFIX,"wb",err); if (outlcpinfo->lcpsubtab.lcp2file->outfplcptab == NULL) { haserr = true; } if (!haserr) { outlcpinfo->lcpsubtab.lcp2file->outfpllvtab = gt_fa_fopen_with_suffix(indexname,GT_LARGELCPTABSUFFIX,"wb",err); if (outlcpinfo->lcpsubtab.lcp2file->outfpllvtab == NULL) { haserr = true; } } } outlcpinfo->numsuffixes2output = 0; outlcpinfo->minchanged = 0; if (!haserr && prefixlength > 0) { outlcpinfo->turnwheel = gt_turningwheel_new(prefixlength,numofchars); outlcpinfo->sizeofinfo += gt_turningwheel_size(); } else { outlcpinfo->turnwheel = NULL; } #ifdef SKDEBUG outlcpinfo->previoussuffix.startpos = 0; #endif outlcpinfo->previoussuffix.code = 0; outlcpinfo->previoussuffix.prefixindex = 0; outlcpinfo->previoussuffix.defined = false; outlcpinfo->previousbucketwasempty = false; outlcpinfo->lcpsubtab.tableoflcpvalues.bucketoflcpvalues = NULL; outlcpinfo->lcpsubtab.tableoflcpvalues.numofentries = 0; #ifndef NDEBUG outlcpinfo->lcpsubtab.tableoflcpvalues.isset = NULL; #endif if (haserr) { gt_free(outlcpinfo); return NULL; } return outlcpinfo; }
Pckbuckettable *pckbuckettable_new(const void *voidbwtseq, unsigned int numofchars, Seqpos totallength, unsigned int maxdepth) { GtArrayBoundsatdepth stack; Boundsatdepth parent, child; unsigned long rangesize, idx; Seqpos *rangeOccs; Pckbuckettable *pckbt; Mbtab *tmpmbtab; GT_INITARRAY(&stack,Boundsatdepth); child.lowerbound = 0; child.upperbound = totallength+1; child.depth = 0; child.code = (Codetype) 0; GT_STOREINARRAY(&stack,Boundsatdepth,128,child); rangeOccs = gt_malloc(sizeof(*rangeOccs) * GT_MULT2(numofchars)); tmpmbtab = gt_malloc(sizeof(*tmpmbtab) * numofchars); pckbt = allocandinitpckbuckettable(numofchars,maxdepth,true); while (stack.nextfreeBoundsatdepth > 0) { parent = stack.spaceBoundsatdepth[--stack.nextfreeBoundsatdepth]; gt_assert(parent.lowerbound < parent.upperbound); rangesize = bwtrangesplitallwithoutspecial(tmpmbtab, rangeOccs, voidbwtseq, parent.lowerbound, parent.upperbound); gt_assert(rangesize <= (unsigned long) numofchars); for (idx = 0; idx < rangesize; idx++) { child.lowerbound = tmpmbtab[idx].lowerbound; child.upperbound = tmpmbtab[idx].upperbound; child.depth = parent.depth + 1; gt_assert(child.depth <= maxdepth); child.code = parent.code * numofchars + idx; /* printf("depth=%lu code=%lu: %lu %lu\n", child.depth,child.code,(unsigned long) child.lowerbound, (unsigned long) child.upperbound); */ storeBoundsatdepth(pckbt,&child); if (child.depth < maxdepth) { if (child.lowerbound + 1 < child.upperbound) { GT_STOREINARRAY(&stack,Boundsatdepth,128,child); } else { followleafedge(pckbt,voidbwtseq,&child); } } } } GT_FREEARRAY(&stack,Boundsatdepth); gt_free(rangeOccs); gt_free(tmpmbtab); printf("filled: %lu (%.2f)\n",pckbt->numofvalues, (double) pckbt->numofvalues/pckbt->maxnumofvalues); return pckbt; }
int gt_mapspec_write(GtMapspecSetupFunc setup, FILE *fp, void *data, GtUword expectedsize, GtError *err) { GtMapspecification *mapspecptr; GtUword byteoffset = 0; int had_err = 0; GtUword totalpadunits = 0; GtUword byteswritten; GtMapspec *ms = gt_malloc(sizeof (GtMapspec)); gt_error_check(err); GT_INITARRAY(&ms->mapspectable,GtMapspecification); setup(ms, data, true); gt_assert(ms->mapspectable.spaceGtMapspecification != NULL); for (mapspecptr = ms->mapspectable.spaceGtMapspecification; mapspecptr < ms->mapspectable.spaceGtMapspecification + ms->mapspectable.nextfreeGtMapspecification; mapspecptr++) { #ifdef SKDEBUG printf("# %s",__func__); showmapspec(mapspecptr); printf(" at byteoffset "GT_WU"\n",byteoffset); #endif if (mapspecptr->numofunits > 0) { switch (mapspecptr->typespec) { case GtCharType: WRITEACTIONWITHTYPE(char); break; case GtFilelengthvaluesType: WRITEACTIONWITHTYPE(GtFilelengthvalues); break; case GtUcharType: WRITEACTIONWITHTYPE(GtUchar); break; case Uint16Type: WRITEACTIONWITHTYPE(uint16_t); break; case Uint32Type: WRITEACTIONWITHTYPE(uint32_t); break; case GtUlongType: WRITEACTIONWITHTYPE(GtUlong); break; case Uint64Type: WRITEACTIONWITHTYPE(uint64_t); break; case GtBitsequenceType: WRITEACTIONWITHTYPE(GtBitsequence); break; case GtUlongBoundType: WRITEACTIONWITHTYPE(GtUlongBound); break; case GtPairBwtidxType: WRITEACTIONWITHTYPE(GtPairBwtidx); break; case GtTwobitencodingType: WRITEACTIONWITHTYPE(GtTwobitencoding); break; case GtSpecialcharinfoType: WRITEACTIONWITHTYPE(GtSpecialcharinfo); break; case GtBitElemType: WRITEACTIONWITHTYPE(BitElem); break; case GtUintType: WRITEACTIONWITHTYPE(unsigned int); break; default: gt_error_set(err, "no map specification for size " GT_WU, (GtUword) mapspecptr->sizeofunit); had_err = -1; } } if (had_err) { break; } byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (byteoffset + mapspecptr->sizeofunit * mapspecptr->numofunits)); if (gt_mapspec_pad(fp,&byteswritten,byteoffset,err) != 0) { had_err = -1; } byteoffset += byteswritten; totalpadunits += byteswritten; } if (!had_err) { if (expectedsize + totalpadunits != byteoffset) { gt_error_set(err, "expected file size is " GT_WU " bytes, " "but file has " GT_WU " bytes", expectedsize, byteoffset); had_err = -1; } } GT_FREEARRAY(&ms->mapspectable,GtMapspecification); gt_free(ms); return had_err; }
int gt_mapspec_read(GtMapspecSetupFunc setup, void *data, const char *filename, GtUword expectedsize, void **mapped, GtError *err) { void *mapptr; uint64_t expectedaccordingtomapspec; GtUword byteoffset = 0; size_t numofbytes; GtMapspec *ms = gt_malloc(sizeof (GtMapspec)); GtMapspecification *mapspecptr; int had_err = 0; GtUword totalpadunits = 0; gt_error_check(err); GT_INITARRAY(&ms->mapspectable, GtMapspecification); setup(ms, data, false); mapptr = gt_fa_mmap_read(filename, &numofbytes, err); if (mapptr == NULL) { had_err = -1; } *mapped = mapptr; if (!had_err) { if (assigncorrecttype(ms->mapspectable.spaceGtMapspecification, mapptr,0,err) != 0) { had_err = -1; } } if (!had_err) { expectedaccordingtomapspec = detexpectedaccordingtomapspec(&ms->mapspectable); if (expectedaccordingtomapspec != (uint64_t) numofbytes) { gt_error_set(err, GT_WU " bytes read from %s, but " Formatuint64_t " expected", (GtUword) numofbytes, filename, PRINTuint64_tcast(expectedaccordingtomapspec)); had_err = -1; } } if (!had_err) { mapspecptr = ms->mapspectable.spaceGtMapspecification; gt_assert(mapspecptr != NULL); byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (mapspecptr->sizeofunit * mapspecptr->numofunits)); if (byteoffset % (GtUword) GT_WORDSIZE_INBYTES > 0) { size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES); byteoffset += (GtUword) padunits; totalpadunits += (GtUword) padunits; } for (mapspecptr++; mapspecptr < ms->mapspectable.spaceGtMapspecification + ms->mapspectable.nextfreeGtMapspecification; mapspecptr++) { if (assigncorrecttype(mapspecptr,mapptr,byteoffset,err) != 0) { had_err = -1; break; } byteoffset = CALLCASTFUNC(uint64_t,unsigned_long, (uint64_t) (byteoffset + mapspecptr->sizeofunit * mapspecptr->numofunits)); if (byteoffset % (GtUword) GT_WORDSIZE_INBYTES > 0) { size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES); byteoffset += (GtUword) padunits; totalpadunits += (GtUword) padunits; } } } if (!had_err) { if (expectedsize + totalpadunits != byteoffset) { gt_error_set(err,"mapping: expected file size is "GT_WU" bytes, " "but file has "GT_WU" bytes", expectedsize,byteoffset); had_err = -1; } } GT_FREEARRAY(&ms->mapspectable,GtMapspecification); gt_free(ms); return had_err; }
static int enumeratelcpintervals(const char *inputindex, Sequentialsuffixarrayreader *ssar, const char *storeindex, bool storecounts, GtUword mersize, GtUword minocc, GtUword maxocc, bool performtest, GtLogger *logger, GtError *err) { TyrDfsstate *state; bool haserr = false; unsigned int alphasize; gt_error_check(err); state = gt_malloc(sizeof (*state)); GT_INITARRAY(&state->occdistribution,Countwithpositions); state->esrspace = gt_encseq_create_reader_with_readmode( gt_encseqSequentialsuffixarrayreader(ssar), gt_readmodeSequentialsuffixarrayreader(ssar), 0); state->mersize = (GtUword) mersize; state->encseq = gt_encseqSequentialsuffixarrayreader(ssar); alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq)); state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar); state->storecounts = storecounts; state->minocc = minocc; state->maxocc = maxocc; state->totallength = gt_encseq_total_length(state->encseq); state->performtest = performtest; state->countoutputmers = 0; state->merindexfpout = NULL; state->countsfilefpout = NULL; GT_INITARRAY(&state->largecounts,Largecount); if (strlen(storeindex) == 0) { state->sizeofbuffer = 0; state->bytebuffer = NULL; } else { state->sizeofbuffer = MERBYTES(mersize); state->bytebuffer = gt_malloc(sizeof *state->bytebuffer * state->sizeofbuffer); } if (performtest) { state->currentmer = gt_malloc(sizeof *state->currentmer * state->mersize); state->suftab = gt_suftabSequentialsuffixarrayreader(ssar); } else { state->currentmer = NULL; state->suftab = NULL; } if (state->mersize > state->totallength) { gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed", state->mersize, state->totallength); haserr = true; } else { if (strlen(storeindex) == 0) { state->processoccurrencecount = adddistpos2distribution; } else { state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX, "wb",err); if (state->merindexfpout == NULL) { haserr = true; } else { if (state->storecounts) { state->countsfilefpout = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err); if (state->countsfilefpout == NULL) { haserr = true; } } } state->processoccurrencecount = outputsortedstring2index; } if (!haserr) { if (gt_depthfirstesa(ssar, tyr_allocateDfsinfo, tyr_freeDfsinfo, tyr_processleafedge, NULL, tyr_processcompletenode, tyr_assignleftmostleaf, tyr_assignrightmostleaf, (Dfsstate*) state, logger, err) != 0) { haserr = true; } if (strlen(storeindex) == 0) { showfinalstatistics(state,inputindex,logger); } } if (!haserr) { if (state->countsfilefpout != NULL) { gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU " to file \"%s%s\"", state->largecounts.nextfreeLargecount, (GtUword) MAXSMALLMERCOUNT, storeindex, COUNTSSUFFIX); gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount), (size_t) state->largecounts.nextfreeLargecount, state->countsfilefpout); } } if (!haserr) { gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"", mersize, state->countoutputmers); gt_logger_log(logger,"index size: %.2f megabytes\n", GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer + sizeof (GtUword) * EXTRAINTEGERS)); } } /* now out EXTRAINTEGERS integer values */ if (!haserr && state->merindexfpout != NULL) { outputbytewiseUlongvalue(state->merindexfpout, (GtUword) state->mersize); outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize); } gt_fa_xfclose(state->merindexfpout); gt_fa_xfclose(state->countsfilefpout); GT_FREEARRAY(&state->occdistribution,Countwithpositions); gt_free(state->currentmer); gt_free(state->bytebuffer); GT_FREEARRAY(&state->largecounts,Largecount); gt_encseq_reader_delete(state->esrspace); gt_free(state); return haserr ? -1 : 0; }
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes, const GtAlphabet *alphabet, GtError *err) { GtUchar *writeptr = filecontents, *readptr = filecontents; const GtUchar *endptr = filecontents + numofbytes; bool firstline = true, haserr = false; GtUword lastspecialrange_length = 0; GtBareSpecialrange *srptr = NULL; GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *smap = gt_alphabet_symbolmap(alphabet); bare_encseq->specialcharacters = 0; bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet); bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); readptr = filecontents; while (!haserr && readptr < endptr) { if (*readptr == '>') { if (!firstline) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; *writeptr++ = SEPARATOR; bare_encseq->specialcharacters++; } else { firstline = false; } while (readptr < endptr && *readptr != '\n') { readptr++; } readptr++; } else { while (readptr < endptr && *readptr != '\n') { if (!isspace(*readptr)) { GtUchar cc = smap[*readptr]; if (cc == UNDEFCHAR) { gt_error_set(err,"illegal input characters %c\n",*readptr); haserr = true; break; } if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } *writeptr++ = cc; } readptr++; } readptr++; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = filecontents; bare_encseq->totallength = (GtUword) (writeptr - filecontents); if (haserr) { gt_bare_encseq_delete(bare_encseq); return NULL; } return bare_encseq; }