static void gt_seqorder_output(unsigned long seqnum, GtEncseq *encseq) { GtEncseqReader *esr; unsigned long startpos, len, desclen = 0; const char *desc = NULL; unsigned long i; startpos = gt_encseq_seqstartpos(encseq, seqnum); len = gt_encseq_seqlength(encseq, seqnum); gt_xfputc(GT_FASTA_SEPARATOR, stdout); if (gt_encseq_has_description_support(encseq)) { desc = gt_encseq_description(encseq, &desclen, seqnum); gt_xfwrite(desc, (size_t)1, (size_t)desclen, stdout); } gt_xfputc('\n', stdout); esr = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, startpos); for (i = 0; i < len; i++) { gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout); } gt_encseq_reader_delete(esr); gt_xfputc('\n', stdout); }
void gt_cntlist_write_bin_header(GtUword nofreads, FILE *file) { gt_assert(file != NULL); gt_xfputc(GT_CNTLIST_BIN_HEADER, file); gt_xfputc((char)sizeof(GtUword), file); gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file); }
int gt_hcr_decoder_decode_range(GtHcrDecoder *hcr_dec, const char *name, GtUword start, GtUword end, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; GtStr *desc = gt_str_new(); int had_err = 0; GtUword cur_width, cur_read; size_t i; FILE *output; GT_UNUSED GtHcrSeqDecoder *seq_dec; gt_error_check(err); gt_assert(hcr_dec && name); seq_dec = hcr_dec->seq_dec; gt_assert(start <= end); gt_assert(start < seq_dec->num_of_reads && end < seq_dec->num_of_reads); if (timer != NULL) gt_timer_show_progress(timer, "decode hcr", stdout); output = gt_fa_fopen_with_suffix(name, HCRFILEDECODEDSUFFIX, "w", err); if (output == NULL) had_err = -1; for (cur_read = start; had_err == 0 && cur_read <= end; cur_read++) { if (gt_hcr_decoder_decode(hcr_dec, cur_read, seq, qual, desc, err) != 0) had_err = -1; else { gt_xfputc(HCR_DESCSEPSEQ, output); if (hcr_dec->encdesc != NULL) gt_xfputs(gt_str_get(desc), output); else fprintf(output, ""GT_WU"", cur_read); gt_xfputc('\n', output); for (i = 0, cur_width = 0; i < strlen(seq); i++, cur_width++) { if (cur_width == HCR_LINEWIDTH) { cur_width = 0; gt_xfputc('\n', output); } gt_xfputc(seq[i], output); } gt_xfputc('\n', output); gt_xfputc(HCR_DESCSEPQUAL, output); gt_xfputc('\n', output); for (i = 0, cur_width = 0; i < strlen(qual); i++, cur_width++) { if (cur_width == HCR_LINEWIDTH) { cur_width = 0; gt_xfputc('\n', output); } gt_xfputc(qual[i], output); } gt_xfputc('\n', output); } } gt_fa_xfclose(output); gt_str_delete(desc); return had_err; }
static inline void gt_cntlist_show_bit(GtBitsequence *cntlist, GtUword nofreads, FILE *file) { gt_assert(file != NULL); gt_xfputc(GT_CNTLIST_BIT_HEADER, file); gt_xfputc((char)sizeof(GtUword), file); gt_xfwrite(&(nofreads), sizeof (GtUword), (size_t)1, file); gt_xfwrite(cntlist, sizeof (GtBitsequence), GT_NUMOFINTSFORBITS(nofreads), file); }
void gt_cstr_show(const char *cstr, GtUword length, FILE *fp) { GtUword i; gt_assert(cstr && fp); for (i = 0; i < length; i++) gt_xfputc(cstr[i], fp); }
void gt_score_matrix_show(const GtScoreMatrix *sm, FILE *fp) { unsigned i, j; gt_assert(sm && fp); /* show alphabet line */ gt_xfputc(' ', fp); for (i = 0; i < gt_alphabet_size(sm->alphabet); i++) fprintf(fp, " %c", gt_alphabet_decode(sm->alphabet, i)); gt_xfputc('\n', fp); /* show score lines */ for (i = 0; i < gt_alphabet_size(sm->alphabet); i++) { gt_xfputc(gt_alphabet_decode(sm->alphabet, i), fp); for (j = 0; j < gt_alphabet_size(sm->alphabet); j++) fprintf(fp, " %2d", gt_score_matrix_get_score(sm, i, j)); gt_xfputc('\n', fp); } }
void gt_file_xfputc(int c, GtFile *file) { if (!file) return gt_xfputc(c, stdout); switch (file->mode) { case GT_FILE_MODE_UNCOMPRESSED: gt_xfputc(c, file->fileptr.file); break; case GT_FILE_MODE_GZIP: gt_xgzfputc(c, file->fileptr.gzfile); break; case GT_FILE_MODE_BZIP2: gt_xbzfputc(c, file->fileptr.bzfile); break; default: gt_assert(0); } }
static int giextract_encodedseq2fasta(FILE *fpout, const GtEncseq *encseq, unsigned long seqnum, const Fastakeyquery *fastakeyquery, unsigned long linewidth, GT_UNUSED GtError *err) { const char *desc; unsigned long desclen; bool haserr = false; desc = gt_encseq_description(encseq, &desclen, seqnum); gt_xfputc('>',fpout); if (fastakeyquery != NULL && !COMPLETE(fastakeyquery)) { printf("%s %lu %lu ",fastakeyquery->fastakey, fastakeyquery->frompos, fastakeyquery->topos); } gt_xfwrite(desc,sizeof *desc,(size_t) desclen,fpout); if (!haserr) { unsigned long frompos, topos, seqstartpos, seqlength ; gt_xfputc('\n',fpout); seqstartpos = gt_encseq_seqstartpos(encseq, seqnum); seqlength = gt_encseq_seqlength(encseq, seqnum); if (fastakeyquery != NULL && !COMPLETE(fastakeyquery)) { frompos = fastakeyquery->frompos-1; topos = fastakeyquery->topos - fastakeyquery->frompos + 1; } else { frompos = 0; topos = seqlength; } gt_encseq2symbolstring(fpout, encseq, GT_READMODE_FORWARD, seqstartpos + frompos, topos, linewidth); } return haserr ? -1 : 0; }
static void dump_md5_fingerprints(char **md5_fingerprints, GtUword num_of_md5s, FILE *outfp) { GtUword i; gt_assert(md5_fingerprints && num_of_md5s && outfp); for (i = 0; i < num_of_md5s; i++) { gt_xfputs(md5_fingerprints[i], outfp); gt_xfputc('\0', outfp); } }
void gt_alphabet_decode_seq_to_fp(const GtAlphabet *alphabet, FILE *fpout, const GtUchar *src, unsigned long len) { unsigned long i; const GtUchar *characters; gt_assert(fpout != NULL && (len == 0 || src != NULL)); if (alphabet == NULL) { characters = (const GtUchar *) "acgt"; } else { characters = alphabet->characters; } for (i = 0; i < len; i++) { gt_xfputc((int) characters[(int) src[i]],fpout); } }
static int output_sequence(GtEncseq *encseq, GtEncseqDecodeArguments *args, const char *filename, GtError *err) { GtUword i, j, sfrom, sto; int had_err = 0; bool has_desc; GtEncseqReader *esr; gt_assert(encseq); if (!(has_desc = gt_encseq_has_description_support(encseq))) gt_warning("Missing description support for file %s", filename); if (strcmp(gt_str_get(args->mode), "fasta") == 0) { /* specify a single sequence to extract */ if (args->seq != GT_UNDEF_UWORD) { if (args->seq >= gt_encseq_num_of_sequences(encseq)) { gt_error_set(err, "requested sequence "GT_WU" exceeds number of sequences " "("GT_WU")", args->seq, gt_encseq_num_of_sequences(encseq)); return -1; } sfrom = args->seq; sto = args->seq + 1; } else if (args->seqrng.start != GT_UNDEF_UWORD && args->seqrng.end != GT_UNDEF_UWORD) { /* specify a sequence range to extract */ if (args->seqrng.start >= gt_encseq_num_of_sequences(encseq) || args->seqrng.end >= gt_encseq_num_of_sequences(encseq)) { gt_error_set(err, "range "GT_WU"-"GT_WU" includes a sequence number " "exceeding the total number of sequences ("GT_WU")", args->seqrng.start, args->seqrng.end, gt_encseq_num_of_sequences(encseq)); return -1; } sfrom = args->seqrng.start; sto = args->seqrng.end + 1; } else { /* extract all sequences */ sfrom = 0; sto = gt_encseq_num_of_sequences(encseq); } for (i = sfrom; i < sto; i++) { GtUword desclen, startpos, len; char buf[BUFSIZ]; const char *desc = NULL; /* XXX: maybe make this distinction in the functions via readmode? */ if (!GT_ISDIRREVERSE(args->rm)) { startpos = gt_encseq_seqstartpos(encseq, i); len = gt_encseq_seqlength(encseq, i); if (has_desc) { desc = gt_encseq_description(encseq, &desclen, i); } else { (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i); desclen = strlen(buf); desc = buf; } } else { startpos = gt_encseq_seqstartpos(encseq, i); len = gt_encseq_seqlength(encseq, gt_encseq_num_of_sequences(encseq)-1-i); startpos = gt_encseq_total_length(encseq) - (gt_encseq_seqstartpos(encseq, gt_encseq_num_of_sequences( encseq)-1-i) + len); if (has_desc) { desc = gt_encseq_description(encseq, &desclen, gt_encseq_num_of_sequences(encseq)-1-i); } else { (void) snprintf(buf, BUFSIZ, "sequence "GT_WU"", i); desclen = strlen(buf); desc = buf; } } gt_assert(desc); /* output description */ gt_xfputc(GT_FASTA_SEPARATOR, stdout); gt_xfwrite(desc, 1, desclen, stdout); gt_xfputc('\n', stdout); /* XXX: make this more efficient by writing in a buffer first and then showing the result */ if (args->singlechars) { for (j = 0; j < len; j++) { gt_xfputc(gt_encseq_get_decoded_char(encseq, startpos + j, args->rm), stdout); } } else { esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, startpos); for (j = 0; j < len; j++) { gt_xfputc(gt_encseq_reader_next_decoded_char(esr), stdout); } gt_encseq_reader_delete(esr); } gt_xfputc('\n', stdout); } } if (strcmp(gt_str_get(args->mode), "concat") == 0) { GtUword from = 0, to = gt_encseq_total_length(encseq) - 1; if (args->rng.start != GT_UNDEF_UWORD && args->rng.end != GT_UNDEF_UWORD) { if (args->rng.end > to) { had_err = -1; gt_error_set(err, "end of range ("GT_WU") exceeds encoded sequence length " "("GT_WU")", args->rng.end, to); } if (!had_err) { from = args->rng.start; to = args->rng.end; } } if (!had_err) { if (args->singlechars) { for (j = from; j <= to; j++) { char cc = gt_encseq_get_decoded_char(encseq, j, args->rm); if (cc == (char) SEPARATOR) cc = gt_str_get(args->sepchar)[0]; gt_xfputc(cc, stdout); } } else { esr = gt_encseq_create_reader_with_readmode(encseq, args->rm, from); if (esr) { for (j = from; j <= to; j++) { char cc = gt_encseq_reader_next_decoded_char(esr); if (cc == (char) SEPARATOR) cc = gt_str_get(args->sepchar)[0]; gt_xfputc(cc, stdout); } gt_encseq_reader_delete(esr); } } gt_xfputc('\n', stdout); } } return had_err; }
void gt_alignment_show_with_mapped_chars(const GtAlignment *alignment, const GtUchar *characters, GtUchar wildcardshow, FILE *fp) { GtUword i, j, idx_u, idx_v, meoplen; GtMultieop *meop; gt_assert(alignment); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); /* output first line */ idx_u = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->u[idx_u]) ? (int) wildcardshow : (int) characters[alignment->u[idx_u]], fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); /* output middle line */ idx_u = idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (alignment->u[idx_u] == alignment->v[idx_v] && ISNOTSPECIAL(alignment->u[idx_u])) { gt_xfputc(MATCHSYMBOL, fp); } else { gt_xfputc(MISMATCHSYMBOL, fp); } idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_v++; } break; } } gt_xfputc('\n', fp); /* ouput last line */ idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->v[idx_v]) ? (int) wildcardshow : (int) characters[alignment->v[idx_v]], fp); idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); }
/* XXX: add width parameter and format the GtAlignment accordingly */ void gt_alignment_show(const GtAlignment *alignment, FILE *fp) { GtUword i, j, idx_u, idx_v, meoplen; GtMultieop *meop; gt_assert(alignment); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); /* output first line */ idx_u = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Deletion: for (j = 0; j < meop->steps; j++) gt_xfputc((int) alignment->u[idx_u++], fp); break; case Insertion: for (j = 0; j < meop->steps; j++) gt_xfputc(GAPSYMBOL, fp); break; } } gt_xfputc('\n', fp); /* output middle line */ idx_u = idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (tolower((int) alignment->u[idx_u++]) == tolower((int) alignment->v[idx_v++])) gt_xfputc(MATCHSYMBOL, fp); else gt_xfputc(MISMATCHSYMBOL, fp); } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_v++; } break; } } gt_xfputc('\n', fp); /* ouput last line */ idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Insertion: for (j = 0; j < meop->steps; j++) gt_xfputc((int) alignment->v[idx_v++], fp); break; case Deletion: for (j = 0; j < meop->steps; j++) gt_xfputc(GAPSYMBOL, fp); break; } } gt_xfputc('\n', fp); }
void gt_alphabet_echo_pretty_symbol(const GtAlphabet *alphabet, FILE *fpout, GtUchar currentchar) { gt_xfputc((int) converttoprettysymbol(alphabet, currentchar), fpout); }
int gt_extractkeysfromdesfile(const char *indexname, bool sortkeys, GtLogger *logger, GtError *err) { FILE *fpin, *fpout = NULL; GtStr *line = NULL; const char *keyptr; unsigned long keylen, constantkeylen = 0, linenum;/* incorrectorder = 0;*/ bool haserr = false, firstdesc = true; char *previouskey = NULL; Fixedsizekey *keytab = NULL, *keytabptr = NULL; GtEncseq *encseq = NULL; unsigned long numofentries = 0; const unsigned long linewidth = 60UL; fpin = gt_fa_fopen_with_suffix(indexname,GT_DESTABFILESUFFIX,"rb",err); if (fpin == NULL) { return -1; } if (!sortkeys) { fpout = gt_fa_fopen_with_suffix(indexname,GT_KEYSTABFILESUFFIX,"wb",err); if (fpout == NULL) { haserr = true; } } if (!haserr) { line = gt_str_new(); } for (linenum = 0; !haserr && gt_str_read_next_line(line, fpin) != EOF; linenum++) { keyptr = desc2key(&keylen,gt_str_get(line),err); if (keyptr == NULL) { haserr = true; break; } if (keylen == 0) { gt_error_set(err,"key of length 0 in \"%s\" not expected", gt_str_get(line)); haserr = true; break; } if (firstdesc) { if (keylen > (unsigned long) CHAR_MAX) { gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; " "no key must be larger than %d", (int) keylen,(int) keylen,keyptr,keylen,CHAR_MAX); haserr = true; break; } constantkeylen = keylen; previouskey = gt_malloc(sizeof (char) * (constantkeylen+1)); firstdesc = false; if (!sortkeys) { gt_xfputc((char) constantkeylen,fpout); } else { GtEncseqLoader *el; if (constantkeylen > (unsigned long) MAXFIXEDKEYSIZE) { gt_error_set(err,"key \"%*.*s\" of length %lu not allowed; " "no key must be larger than %d", (int) keylen,(int) keylen,keyptr,keylen, MAXFIXEDKEYSIZE); haserr = true; break; } el = gt_encseq_loader_new(); gt_encseq_loader_set_logger(el, logger); encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (encseq == NULL) { haserr = true; break; } numofentries = gt_encseq_num_of_sequences(encseq); gt_assert(numofentries > 0); keytab = gt_malloc(sizeof (*keytab) * numofentries); keytabptr = keytab; } } else { if (constantkeylen != keylen) { gt_error_set(err,"key \"%*.*s\" of length %lu: all keys must be of " "the same length which for all previously seen " "headers is %lu", (int) keylen,(int) keylen,keyptr,keylen, constantkeylen); haserr = true; break; } gt_assert(previouskey != NULL); if (!sortkeys && strncmp(previouskey,keyptr,(size_t) constantkeylen) >= 0) { gt_error_set(err,"previous key \"%s\" is not lexicographically smaller " "than current key \"%*.*s\"", previouskey,(int) keylen,(int) keylen,keyptr); haserr = true; break; /* printf("previous key \"%s\" (no %lu) is lexicographically larger " "than current key \"%*.*s\"\n", previouskey,linenum,(int) keylen,(int) keylen,keyptr); incorrectorder++; */ } } if (!sortkeys) { gt_xfwrite(keyptr,sizeof *keyptr,(size_t) keylen,fpout); gt_xfputc('\0',fpout); } else { gt_assert(keytabptr != NULL); strncpy(keytabptr->key,keyptr,(size_t) constantkeylen); keytabptr->key[constantkeylen] = '\0'; keytabptr->seqnum = linenum; keytabptr++; } strncpy(previouskey,keyptr,(size_t) constantkeylen); previouskey[constantkeylen] = '\0'; gt_str_reset(line); } if (!haserr) { gt_logger_log(logger,"number of keys of length %lu = %lu", constantkeylen,linenum); /* gt_logger_log(logger,"number of incorrectly ordered keys = %lu", incorrectorder); */ } gt_str_delete(line); gt_fa_fclose(fpin); gt_fa_fclose(fpout); gt_free(previouskey); if (!haserr && sortkeys) { gt_assert(keytabptr != NULL); gt_assert(numofentries > 0); gt_assert(keytabptr == keytab + numofentries); qsort(keytab,(size_t) numofentries,sizeof (*keytab),compareFixedkeys); gt_assert(keytabptr != NULL); for (keytabptr = keytab; !haserr && keytabptr < keytab + numofentries; keytabptr++) { if (giextract_encodedseq2fasta(stdout, encseq, keytabptr->seqnum, NULL, linewidth, err) != 0) { haserr = true; break; } } } if (encseq != NULL) { gt_encseq_delete(encseq); encseq = NULL; } gt_free(keytab); return haserr ? -1 : 0; }