static int outputsortedstring2indexviafileptr(const GtEncseq *encseq, GtUword mersize, GtUchar *bytebuffer, GtUword sizeofbuffer, FILE *merindexfpout, FILE *countsfilefpout, GtUword position, GtUword countocc, GtArrayLargecount *largecounts, GtUword countoutputmers, GT_UNUSED GtError *err) { gt_encseq_sequence2bytecode(bytebuffer,encseq,position,mersize); gt_xfwrite(bytebuffer, sizeof (*bytebuffer), (size_t) sizeofbuffer, merindexfpout); if (countsfilefpout != NULL) { GtUchar smallcount; if (countocc <= MAXSMALLMERCOUNT) { smallcount = (GtUchar) countocc; } else { Largecount *lc; GT_GETNEXTFREEINARRAY(lc,largecounts,Largecount,32); lc->idx = countoutputmers; lc->value = countocc; smallcount = 0; } gt_xfwrite(&smallcount, sizeof (smallcount),(size_t) 1,countsfilefpout); } return 0; }
static inline void gt_cntlist_show_bit(GtBitsequence *cntlist, GtUword nofreads, FILE *file) { gt_assert(file != NULL); gt_xfputc(GT_CNTLIST_BIT_HEADER, file); gt_xfputc((char)sizeof(GtUword), file); gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file); gt_xfwrite(cntlist, sizeof (GtBitsequence), GT_NUMOFINTSFORBITS(nofreads), file); }
/* FIXME: convert to platform-independent variant */ int SRLSaveToStream(struct seqRangeList *rangeList, FILE *fp) { size_t numRanges; gt_assert(rangeList && fp); numRanges = rangeList->numRanges; gt_xfwrite(&(rangeList->numRanges), sizeof (rangeList->numRanges), 1, fp); gt_xfwrite(rangeList->ranges, sizeof (struct seqRange), numRanges, fp); return 1; }
static void gt_seqorder_output(unsigned long seqnum, GtEncseq *encseq) { GtEncseqReader *esr; unsigned long startpos, len, desclen = 0; const char *desc = NULL; unsigned long i; startpos = gt_encseq_seqstartpos(encseq, seqnum); len = gt_encseq_seqlength(encseq, seqnum); gt_xfputc(GT_FASTA_SEPARATOR, stdout); if (gt_encseq_has_description_support(encseq)) { desc = gt_encseq_description(encseq, &desclen, seqnum); gt_xfwrite(desc, (size_t)1, (size_t)desclen, stdout); } gt_xfputc('\n', stdout); esr = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, startpos); for (i = 0; i < len; i++) { gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout); } gt_encseq_reader_delete(esr); gt_xfputc('\n', stdout); }
void gt_cntlist_write_bin_header(GtUword nofreads, FILE *file) { gt_assert(file != NULL); gt_xfputc(GT_CNTLIST_BIN_HEADER, file); gt_xfputc((char)sizeof(GtUword), file); gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file); }
static GtUword outmany0lcpvalues(GtUword many, FILE *outfplcptab) { GtUword i, countout; #define GT_LCPBUF_NUMBEROFZEROS 1024 uint8_t outvalues[GT_LCPBUF_NUMBEROFZEROS] = {0}; countout = many/GT_LCPBUF_NUMBEROFZEROS; for (i=0; i<countout; i++) { gt_xfwrite(outvalues,sizeof (uint8_t),(size_t) GT_LCPBUF_NUMBEROFZEROS, outfplcptab); } gt_xfwrite(outvalues,sizeof (uint8_t),(size_t) many % GT_LCPBUF_NUMBEROFZEROS, outfplcptab); return many; }
void gt_leftborderbuffer_flush(GtLeftborderOutbuffer *leftborderbuffer) { gt_xfwrite(leftborderbuffer->spaceuint32_t, sizeof (*leftborderbuffer->spaceuint32_t), (size_t) leftborderbuffer->nextfree, leftborderbuffer->fp); leftborderbuffer->totalwrite += leftborderbuffer->nextfree; leftborderbuffer->nextfree = 0; }
int pckbucket2file(const GtStr *indexname,const Pckbuckettable *pckbuckettable, GtError *err) { FILE *fp; Seqpos seqposmaxdepth; gt_error_check(err); fp = opensfxfile(indexname,PCKBUCKETTABLE,"wb",err); if (fp == NULL) { return -1; } seqposmaxdepth = (Seqpos) pckbuckettable->maxdepth; gt_xfwrite(&seqposmaxdepth,sizeof (Seqpos),(size_t) 1,fp); gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab), (size_t) pckbuckettable->maxnumofvalues,fp); gt_fa_fclose(fp); return 0; }
void gt_file_xwrite(GtFile *file, void *buf, size_t nbytes) { if (!file) { gt_xfwrite(buf, 1, nbytes, stdout); return; } switch (file->mode) { case GT_FILE_MODE_UNCOMPRESSED: gt_xfwrite(buf, 1, nbytes, file->fileptr.file); break; case GT_FILE_MODE_GZIP: gt_xgzwrite(file->fileptr.gzfile, buf, nbytes); break; case GT_FILE_MODE_BZIP2: gt_xbzwrite(file->fileptr.bzfile, buf, nbytes); break; default: gt_assert(0); } }
static void gt_Sfxmappedrange_storetmp(GtSfxmappedrange *sfxmappedrange, GtSfxStoretype usedptrptr, GtSfxmappedrangetype type, bool writable) { FILE *outfp; gt_assert(sfxmappedrange != NULL); sfxmappedrange->ptr = NULL; sfxmappedrange->filename = gt_str_new(); sfxmappedrange->writable = writable; outfp = gt_xtmpfp(sfxmappedrange->filename); gt_assert(outfp != NULL); gt_log_log("write %s to file %s ("GT_WU" units of "GT_WU" bytes)", gt_str_get(sfxmappedrange->tablename), gt_str_get(sfxmappedrange->filename), (GtUword) sfxmappedrange->numofunits, (GtUword) sfxmappedrange->sizeofunit); switch (type) { case GtSfxGtBitsequence: gt_xfwrite(*(usedptrptr.bs),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.bs; gt_free(*(usedptrptr.bs)); *(usedptrptr.bs) = NULL; break; case GtSfxunsignedlong: gt_xfwrite(*(usedptrptr.ulong),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.ulong; gt_free(*(usedptrptr.ulong)); *(usedptrptr.ulong) = NULL; break; case GtSfxuint32_t: gt_xfwrite(*(usedptrptr.uint32),sfxmappedrange->sizeofunit, sfxmappedrange->numofunits,outfp); sfxmappedrange->usedptrptr = (void**) usedptrptr.uint32; gt_free(*(usedptrptr.uint32)); *(usedptrptr.uint32) = NULL; break; } gt_fa_fclose(outfp); }
void gt_alphabet_output(const GtAlphabet *alphabet, FILE *fpout) { GtStr *buf; gt_assert(alphabet && fpout); buf = gt_str_new(); gt_alphabet_to_str(alphabet, buf); gt_xfwrite(gt_str_get(buf), sizeof (char), (size_t) gt_str_length(buf), fpout); gt_str_delete(buf); }
static void outsmalllcpvalues(Lcpoutput2file *lcp2file, GtUword numoflcps) { gt_assert (lcp2file != NULL); lcp2file->countoutputlcpvalues += numoflcps; gt_assert(lcp2file->outfplcptab != NULL); gt_xfwrite(lcp2file->smalllcpvalues, sizeof (*lcp2file->smalllcpvalues), (size_t) numoflcps, lcp2file->outfplcptab); }
int gt_pckbuckettable_2file(const char *indexname, const Pckbuckettable *pckbuckettable, GtError *err) { FILE *fp; unsigned long seqposmaxdepth; gt_error_check(err); fp = gt_fa_fopen_with_suffix(indexname,PCKBUCKETTABLE,"wb",err); if (fp == NULL) { return -1; } seqposmaxdepth = (unsigned long) pckbuckettable->maxdepth; gt_xfwrite(&seqposmaxdepth,sizeof (unsigned long),(size_t) 1,fp); gt_xfwrite(pckbuckettable->mbtab[0],sizeof (Mbtab), (size_t) pckbuckettable->maxnumofvalues,fp); gt_fa_fclose(fp); return 0; }
void gt_suffixsortspace_to_file (FILE *outfpsuftab, const GtSuffixsortspace *sssp, unsigned long numberofsuffixes) { size_t basesize = sssp->ulongtab != NULL ? sizeof (*sssp->ulongtab) : sizeof (*sssp->uinttab); gt_xfwrite(sssp->ulongtab != NULL ? (void *) sssp->ulongtab : (void *) sssp->uinttab, basesize, (size_t) numberofsuffixes, outfpsuftab); }
void gt_bitoutstream_append(GtBitOutStream *bitstream, GtBitsequence code, unsigned long bits2write) { if (bitstream->bits_left < bits2write) { unsigned int overhang = 0; overhang = bits2write - bitstream->bits_left; bitstream->bitseqbuffer |= code >> overhang; gt_xfwrite(&bitstream->bitseqbuffer, sizeof (GtBitsequence), 1, bitstream->fp); bitstream->bitseqbuffer = 0; bitstream->bits_left = GT_INTWORDSIZE - overhang; }
int gt_mapspec_pad(FILE *fp, GtUword *bytes_written, GtUword byteoffset, GT_UNUSED GtError *err) { if (byteoffset % (GtUword) GT_WORDSIZE_INBYTES > 0) { GtUchar padbuffer[GT_WORDSIZE_INBYTES-1] = {0}; size_t padunits = GT_WORDSIZE_INBYTES - (byteoffset % GT_WORDSIZE_INBYTES); gt_xfwrite(padbuffer,sizeof (GtUchar),padunits,fp); *bytes_written = (GtUword) padunits; } else { *bytes_written = 0; } return 0; }
static int giextract_encodedseq2fasta(FILE *fpout, const GtEncseq *encseq, unsigned long seqnum, const Fastakeyquery *fastakeyquery, unsigned long linewidth, GT_UNUSED GtError *err) { const char *desc; unsigned long desclen; bool haserr = false; desc = gt_encseq_description(encseq, &desclen, seqnum); gt_xfputc('>',fpout); if (fastakeyquery != NULL && !COMPLETE(fastakeyquery)) { printf("%s %lu %lu ",fastakeyquery->fastakey, fastakeyquery->frompos, fastakeyquery->topos); } gt_xfwrite(desc,sizeof *desc,(size_t) desclen,fpout); if (!haserr) { unsigned long frompos, topos, seqstartpos, seqlength ; gt_xfputc('\n',fpout); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum); seqlength = gt_encseq_seqlength(encseq, seqnum); if (fastakeyquery != NULL && !COMPLETE(fastakeyquery)) { frompos = fastakeyquery->frompos-1; topos = fastakeyquery->topos - fastakeyquery->frompos + 1; } else { frompos = 0; topos = seqlength; } gt_encseq2symbolstring(fpout, encseq, GT_READMODE_FORWARD, seqstartpos + frompos, topos, linewidth); } return haserr ? -1 : 0; }
static void bssm_param_plain_write(const GthBSSMParam *bssm_param, FILE *outfp) { GtStr *str; gt_assert(bssm_param && outfp); str = gt_str_new(); gt_str_append_cstr(str, "BSSM = {\n"); if (bssm_param->gt_donor_model_set) { write_model(str, "gt_donor_model", &bssm_param->gt_donor_model); gt_str_append_cstr(str, ",\n"); } if (bssm_param->gc_donor_model_set) { write_model(str, "gc_donor_model", &bssm_param->gc_donor_model); gt_str_append_cstr(str, ",\n"); } if (bssm_param->ag_acceptor_model_set) { write_model(str, "ag_acceptor_model", &bssm_param->ag_acceptor_model); gt_str_append_char(str, '\n'); } gt_str_append_cstr(str, "}\n"); gt_xfwrite(gt_str_get(str), sizeof (char), gt_str_length(str), outfp); gt_str_delete(str); }
int gt_alphabet_to_file(const GtAlphabet *alphabet, const char *indexname, GtError *err) { FILE *al1fp; bool haserr = false; gt_error_check(err); al1fp = gt_fa_fopen_with_suffix(indexname,GT_ALPHABETFILESUFFIX,"wb",err); if (al1fp == NULL) { haserr = true; } if (!haserr) { GtStr *buf = gt_str_new(); gt_alphabet_to_str(alphabet, buf); gt_xfwrite(gt_str_get(buf), sizeof (char), (size_t) gt_str_length(buf), al1fp); gt_fa_xfclose(al1fp); gt_str_delete(buf); } return haserr ? -1 : 0; }
static int output_sequence(GtEncseq *encseq, GtEncseqDecodeArguments *args, const char *filename, GtError *err) { GtUword i, j, sfrom, sto; int had_err = 0; bool has_desc; GtEncseqReader *esr; gt_assert(encseq); if (!(has_desc = gt_encseq_has_description_support(encseq))) gt_warning("Missing description support for file %s", filename); if (strcmp(gt_str_get(args->mode), "fasta") == 0) { /* specify a single sequence to extract */ if (args->seq != GT_UNDEF_UWORD) { if (args->seq >= gt_encseq_num_of_sequences(encseq)) { gt_error_set(err, "requested sequence "GT_WU" exceeds number of sequences " "("GT_WU")", args->seq, gt_encseq_num_of_sequences(encseq)); return -1; } sfrom = args->seq; sto = args->seq + 1; } else if (args->seqrng.start != GT_UNDEF_UWORD && args->seqrng.end != GT_UNDEF_UWORD) { /* specify a sequence range to extract */ if (args->seqrng.start >= gt_encseq_num_of_sequences(encseq) || args->seqrng.end >= gt_encseq_num_of_sequences(encseq)) { gt_error_set(err, "range "GT_WU"-"GT_WU" includes a sequence number " "exceeding the total number of sequences ("GT_WU")", args->seqrng.start, args->seqrng.end, gt_encseq_num_of_sequences(encseq)); return -1; } sfrom = args->seqrng.start; sto = args->seqrng.end + 1; } else { /* extract all sequences */ sfrom = 0; sto = gt_encseq_num_of_sequences(encseq); } for (i = sfrom; i < sto; i++) { GtUword desclen, startpos, len; char buf[BUFSIZ]; const char *desc = NULL; /* XXX: maybe make this distinction in the functions via readmode? */ if (!GT_ISDIRREVERSE(args->rm)) { startpos = gt_encseq_seqstartpos(encseq, i); len = gt_encseq_seqlength(encseq, i); if (has_desc) { desc = gt_encseq_description(encseq, &desclen, i); } else { (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i); desclen = strlen(buf); desc = buf; } } else { startpos = gt_encseq_seqstartpos(encseq, i); len = gt_encseq_seqlength(encseq, gt_encseq_num_of_sequences(encseq)-1-i); startpos = gt_encseq_total_length(encseq) - (gt_encseq_seqstartpos(encseq, gt_encseq_num_of_sequences( encseq)-1-i) + len); if (has_desc) { desc = gt_encseq_description(encseq, &desclen, gt_encseq_num_of_sequences(encseq)-1-i); } else { (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i); desclen = strlen(buf); desc = buf; } } gt_assert(desc); /* output description */ gt_xfputc(GT_FASTA_SEPARATOR, stdout); gt_xfwrite(desc, 1, desclen, stdout); gt_xfputc('\n', stdout); /* XXX: make this more efficient by writing in a buffer first and then showing the result */ if (args->singlechars) { for (j = 0; j < len; j++) { gt_xfputc(gt_encseq_get_decoded_char(encseq, startpos + j, args->rm), stdout); } } else { esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, startpos); for (j = 0; j < len; j++) { gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout); } gt_encseq_reader_delete(esr); } gt_xfputc('\n', stdout); } } if (strcmp(gt_str_get(args->mode), "concat") == 0) { GtUword from = 0, to = gt_encseq_total_length(encseq) - 1; if (args->rng.start != GT_UNDEF_UWORD && args->rng.end != GT_UNDEF_UWORD) { if (args->rng.end > to) { had_err = -1; gt_error_set(err, "end of range ("GT_WU") exceeds encoded sequence length " "("GT_WU")", args->rng.end, to); } if (!had_err) { from = args->rng.start; to = args->rng.end; } } if (!had_err) { if (args->singlechars) { for (j = from; j <= to; j++) { char cc = gt_encseq_get_decoded_char(encseq, j, args->rm); if (cc == (char) SEPARATOR) cc = gt_str_get(args->sepchar)[0]; gt_xfputc(cc, stdout); } } else { esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, from); if (esr) { for (j = from; j <= to; j++) { char cc = gt_encseq_reader_next_decoded_char(esr); if (cc == (char) SEPARATOR) cc = gt_str_get(args->sepchar)[0]; gt_xfputc(cc, stdout); } gt_encseq_reader_delete(esr); } } gt_xfputc('\n', stdout); } } return had_err; }
static int enumeratelcpintervals(const char *inputindex, Sequentialsuffixarrayreader *ssar, const char *storeindex, bool storecounts, GtUword mersize, GtUword minocc, GtUword maxocc, bool performtest, GtLogger *logger, GtError *err) { TyrDfsstate *state; bool haserr = false; unsigned int alphasize; gt_error_check(err); state = gt_malloc(sizeof (*state)); GT_INITARRAY(&state->occdistribution,Countwithpositions); state->esrspace = gt_encseq_create_reader_with_readmode( gt_encseqSequentialsuffixarrayreader(ssar), gt_readmodeSequentialsuffixarrayreader(ssar), 0); state->mersize = (GtUword) mersize; state->encseq = gt_encseqSequentialsuffixarrayreader(ssar); alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq)); state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar); state->storecounts = storecounts; state->minocc = minocc; state->maxocc = maxocc; state->totallength = gt_encseq_total_length(state->encseq); state->performtest = performtest; state->countoutputmers = 0; state->merindexfpout = NULL; state->countsfilefpout = NULL; GT_INITARRAY(&state->largecounts,Largecount); if (strlen(storeindex) == 0) { state->sizeofbuffer = 0; state->bytebuffer = NULL; } else { state->sizeofbuffer = MERBYTES(mersize); state->bytebuffer = gt_malloc(sizeof *state->bytebuffer * state->sizeofbuffer); } if (performtest) { state->currentmer = gt_malloc(sizeof *state->currentmer * state->mersize); state->suftab = gt_suftabSequentialsuffixarrayreader(ssar); } else { state->currentmer = NULL; state->suftab = NULL; } if (state->mersize > state->totallength) { gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed", state->mersize, state->totallength); haserr = true; } else { if (strlen(storeindex) == 0) { state->processoccurrencecount = adddistpos2distribution; } else { state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX, "wb",err); if (state->merindexfpout == NULL) { haserr = true; } else { if (state->storecounts) { state->countsfilefpout = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err); if (state->countsfilefpout == NULL) { haserr = true; } } } state->processoccurrencecount = outputsortedstring2index; } if (!haserr) { if (gt_depthfirstesa(ssar, tyr_allocateDfsinfo, tyr_freeDfsinfo, tyr_processleafedge, NULL, tyr_processcompletenode, tyr_assignleftmostleaf, tyr_assignrightmostleaf, (Dfsstate*) state, logger, err) != 0) { haserr = true; } if (strlen(storeindex) == 0) { showfinalstatistics(state,inputindex,logger); } } if (!haserr) { if (state->countsfilefpout != NULL) { gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU " to file \"%s%s\"", state->largecounts.nextfreeLargecount, (GtUword) MAXSMALLMERCOUNT, storeindex, COUNTSSUFFIX); gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount), (size_t) state->largecounts.nextfreeLargecount, state->countsfilefpout); } } if (!haserr) { gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"", mersize, state->countoutputmers); gt_logger_log(logger,"index size: %.2f megabytes\n", GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer + sizeof (GtUword) * EXTRAINTEGERS)); } } /* now out EXTRAINTEGERS integer values */ if (!haserr && state->merindexfpout != NULL) { outputbytewiseUlongvalue(state->merindexfpout, (GtUword) state->mersize); outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize); } gt_fa_xfclose(state->merindexfpout); gt_fa_xfclose(state->countsfilefpout); GT_FREEARRAY(&state->occdistribution,Countwithpositions); gt_free(state->currentmer); gt_free(state->bytebuffer); GT_FREEARRAY(&state->largecounts,Largecount); gt_encseq_reader_delete(state->esrspace); gt_free(state); return haserr ? -1 : 0; }
static void outlcpvalues(Lcpsubtab *lcpsubtab, GtUword width, GtUword posoffset) { GtUword idx, lcpvalue; Largelcpvalue *largelcpvalueptr; gt_assert(lcpsubtab != NULL && lcpsubtab->lcp2file != NULL); lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue = 0; if (lcpsubtab->tableoflcpvalues.numoflargelcpvalues > 0 && lcpsubtab->tableoflcpvalues.numoflargelcpvalues >= lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue) { lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue = gt_realloc(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue, sizeof (*lcpsubtab->lcp2file->largelcpvalues. spaceLargelcpvalue) * lcpsubtab->tableoflcpvalues.numoflargelcpvalues); lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue = lcpsubtab->tableoflcpvalues.numoflargelcpvalues; } for (idx=0; idx<width; idx++) { lcpvalue = gt_lcptab_getvalue(&lcpsubtab->tableoflcpvalues,0,idx); if (lcpsubtab->lcp2file->maxbranchdepth < lcpvalue) { lcpsubtab->lcp2file->maxbranchdepth = lcpvalue; } if (lcpvalue < (GtUword) LCPOVERFLOW) { lcpsubtab->lcp2file->smalllcpvalues[idx] = (uint8_t) lcpvalue; } else { gt_assert(lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue < lcpsubtab->lcp2file->largelcpvalues. allocatedLargelcpvalue); largelcpvalueptr = lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue + lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue++; largelcpvalueptr->position = posoffset + idx; largelcpvalueptr->value = lcpvalue; lcpsubtab->lcp2file->smalllcpvalues[idx] = LCPOVERFLOW; } lcpsubtab->lcptabsum += (double) lcpvalue; if (lcpsubtab->distlcpvalues != NULL) { gt_disc_distri_add(lcpsubtab->distlcpvalues, lcpvalue); } } outsmalllcpvalues(lcpsubtab->lcp2file,width); if (lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue > 0) { lcpsubtab->lcp2file->totalnumoflargelcpvalues += lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue; gt_assert(lcpsubtab->lcp2file->outfpllvtab != NULL); gt_xfwrite(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue, sizeof (*lcpsubtab->lcp2file->largelcpvalues. spaceLargelcpvalue), (size_t) lcpsubtab->lcp2file->largelcpvalues. nextfreeLargelcpvalue, lcpsubtab->lcp2file->outfpllvtab); } }
int gt_extractkeysfromdesfile(const char *indexname, bool sortkeys, GtLogger *logger, GtError *err) { FILE *fpin, *fpout = NULL; GtStr *line = NULL; const char *keyptr; unsigned long keylen, constantkeylen = 0, linenum;/* incorrectorder = 0;*/ bool haserr = false, firstdesc = true; char *previouskey = NULL; Fixedsizekey *keytab = NULL, *keytabptr = NULL; GtEncseq *encseq = NULL; unsigned long numofentries = 0; const unsigned long linewidth = 60UL; fpin = gt_fa_fopen_with_suffix(indexname,GT_DESTABFILESUFFIX,"rb",err); if (fpin == NULL) { return -1; } if (!sortkeys) { fpout = gt_fa_fopen_with_suffix(indexname,GT_KEYSTABFILESUFFIX,"wb",err); if (fpout == NULL) { haserr = true; } } if (!haserr) { line = gt_str_new(); } for (linenum = 0; !haserr && gt_str_read_next_line(line, fpin) != EOF; linenum++) { keyptr = desc2key(&keylen,gt_str_get(line),err); if (keyptr == NULL) { haserr = true; break; } if (keylen == 0) { gt_error_set(err,"key of length 0 in \"%s\" not expected", gt_str_get(line)); haserr = true; break; } if (firstdesc) { if (keylen > (unsigned long) CHAR_MAX) { gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; " "no key must be larger than %d", (int) keylen,(int) keylen,keyptr,keylen,CHAR_MAX); haserr = true; break; } constantkeylen = keylen; previouskey = gt_malloc(sizeof (char) * (constantkeylen+1)); firstdesc = false; if (!sortkeys) { gt_xfputc((char) constantkeylen,fpout); } else { GtEncseqLoader *el; if (constantkeylen > (unsigned long) MAXFIXEDKEYSIZE) { gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; " "no key must be larger than %d", (int) keylen,(int) keylen,keyptr,keylen, MAXFIXEDKEYSIZE); haserr = true; break; } el = gt_encseq_loader_new(); gt_encseq_loader_set_logger(el, logger); encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (encseq == NULL) { haserr = true; break; } numofentries = gt_encseq_num_of_sequences(encseq); gt_assert(numofentries > 0); keytab = gt_malloc(sizeof (*keytab) * numofentries); keytabptr = keytab; } } else { if (constantkeylen != keylen) { gt_error_set(err,"key \"%*.*s\" of length %lu: all keys must be of " "the same length which for all previously seen " "headers is %lu", (int) keylen,(int) keylen,keyptr,keylen, constantkeylen); haserr = true; break; } gt_assert(previouskey != NULL); if (!sortkeys && strncmp(previouskey,keyptr,(size_t) constantkeylen) >= 0) { gt_error_set(err,"previous key \"%s\" is not lexicographically smaller " "than current key \"%*.*s\"", previouskey,(int) keylen,(int) keylen,keyptr); haserr = true; break; /* printf("previous key \"%s\" (no %lu) is lexicographically larger " "than current key \"%*.*s\"\n", previouskey,linenum,(int) keylen,(int) keylen,keyptr); incorrectorder++; */ } } if (!sortkeys) { gt_xfwrite(keyptr,sizeof *keyptr,(size_t) keylen,fpout); gt_xfputc('\0',fpout); } else { gt_assert(keytabptr != NULL); strncpy(keytabptr->key,keyptr,(size_t) constantkeylen); keytabptr->key[constantkeylen] = '\0'; keytabptr->seqnum = linenum; keytabptr++; } strncpy(previouskey,keyptr,(size_t) constantkeylen); previouskey[constantkeylen] = '\0'; gt_str_reset(line); } if (!haserr) { gt_logger_log(logger,"number of keys of length %lu = %lu", constantkeylen,linenum); /* gt_logger_log(logger,"number of incorrectly ordered keys = %lu", incorrectorder); */ } gt_str_delete(line); gt_fa_fclose(fpin); gt_fa_fclose(fpout); gt_free(previouskey); if (!haserr && sortkeys) { gt_assert(keytabptr != NULL); gt_assert(numofentries > 0); gt_assert(keytabptr == keytab + numofentries); qsort(keytab,(size_t) numofentries,sizeof (*keytab),compareFixedkeys); gt_assert(keytabptr != NULL); for (keytabptr = keytab; !haserr && keytabptr < keytab + numofentries; keytabptr++) { if (giextract_encodedseq2fasta(stdout, encseq, keytabptr->seqnum, NULL, linewidth, err) != 0) { haserr = true; break; } } } if (encseq != NULL) { gt_encseq_delete(encseq); encseq = NULL; } gt_free(keytab); return haserr ? -1 : 0; }