コード例 #1
0
ファイル: gt_seqfilter.c プロジェクト: 9beckert/TIR
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  SeqFilterArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  GtBioseq *bioseq;
  unsigned long i;
  unsigned long long passed = 0, filtered = 0, num_of_sequences = 0;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(tool_arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      if ((arguments->minlength == GT_UNDEF_ULONG ||
           gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
          (arguments->maxlength == GT_UNDEF_ULONG ||
           gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength) &&
          (arguments->maxseqnum == GT_UNDEF_ULONG ||
           passed + 1 <= arguments->maxseqnum)) {
        gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
                            gt_bioseq_get_sequence(bioseq, i),
                            gt_bioseq_get_sequence_length(bioseq, i),
                            arguments->width, arguments->outfp);
        passed++;
      }
      else
        filtered++;
      num_of_sequences++;
    }
    gt_bioseq_delete(bioseq);
  }

  /* show statistics */
  if (!had_err) {
    gt_assert(passed + filtered == num_of_sequences);
    fprintf(stderr, "# %llu out of %llu sequences have been removed (%.3f%%)\n",
            filtered, num_of_sequences,
            ((double) filtered / num_of_sequences) * 100.0);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
コード例 #2
0
static int gt_bioseq_col_grep_desc(GtSeqCol *sc, char **seq,
                                   GtUword start, GtUword end,
                                   GtStr *seqid, GtError *err)
{
  GtUword filenum = 0, seqnum = 0, seqlength;
  int had_err;
  GtBioseqCol *bsc;
  bsc = gt_bioseq_col_cast(sc);
  gt_error_check(err);
  gt_assert(bsc && seq && seqid);
  had_err = grep_desc(bsc, &filenum, &seqnum, seqid, err);
  if (!had_err) {
    seqlength = gt_bioseq_get_sequence_length(bsc->bioseqs[filenum], seqnum);
    if (start > seqlength - 1 || end > seqlength - 1) {
      had_err = -1;
      gt_error_set(err, "trying to extract range "GT_WU"-"GT_WU" on sequence "
                         "``%s'' which is not covered by that sequence (only "
                         ""GT_WU" characters in size). Has the sequence-region "
                         "to sequence mapping been defined correctly?",
                         start, end, gt_str_get(seqid), seqlength);
    }
  }
  if (!had_err) {
    *seq = gt_bioseq_get_sequence_range(bsc->bioseqs[filenum], seqnum,
                                        start, end);
  }
  return had_err;
}
コード例 #3
0
static int extractseq_match(GtFile *outfp, GtBioseq *bs,
                            const char *pattern, unsigned long width,
                            GtError *err)
{
  const char *desc;
  unsigned long i;
  bool match;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(bs && pattern);

  for (i = 0; !had_err && i < gt_bioseq_number_of_sequences(bs); i++) {
    desc = gt_bioseq_get_description(bs, i);
    gt_assert(desc);
    had_err = gt_grep(&match, pattern, desc, err);
    if (!had_err && match) {
      gt_fasta_show_entry_generic(desc, gt_bioseq_get_sequence(bs, i),
                                  gt_bioseq_get_sequence_length(bs, i), width,
                                  outfp);
    }
  }

  return had_err;
}
コード例 #4
0
ファイル: gt_seqmutate.c プロジェクト: 9beckert/TIR
static int gt_seqmutate_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  MutateArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  GtSeq *mutated_seq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      mutated_seq = gt_mutate_seq(gt_bioseq_get_description(bioseq, i),
                                  gt_bioseq_get_sequence(bioseq, i),
                                  gt_bioseq_get_sequence_length(bioseq, i),
                                  gt_bioseq_get_alphabet(bioseq),
                                  arguments->rate);
      gt_fasta_show_entry(gt_seq_get_description(mutated_seq),
                          gt_seq_get_orig(mutated_seq),
                          gt_seq_length(mutated_seq),
                          arguments->width, arguments->outfp);
      gt_seq_delete(mutated_seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
コード例 #5
0
static GtUword gt_bioseq_col_get_sequence_length(const GtSeqCol *sc,
                                                       GtUword filenum,
                                                       GtUword seqnum)
{
  GtBioseqCol *bsc;
  bsc = gt_bioseq_col_cast(sc);
  gt_assert(bsc && filenum < bsc->num_of_seqfiles);
  return gt_bioseq_get_sequence_length(bsc->bioseqs[filenum], seqnum);
}
コード例 #6
0
static bool show_target(GT_UNUSED unsigned long pos, void *data)
{
  TargetInfo *ti = data;
  gt_assert(ti);
  gt_fasta_show_entry(gt_bioseq_get_description(ti->bioseq, ti->seqnum),
                      gt_bioseq_get_sequence(ti->bioseq, ti->seqnum),
                      gt_bioseq_get_sequence_length(ti->bioseq, ti->seqnum), 0);
  return true;
}
コード例 #7
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
GtSeq* gt_bioseq_get_seq(GtBioseq *bs, GtUword idx)
{
  GtSeq *seq;
  gt_assert(bs);
  gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq));
  seq = gt_seq_new_own(gt_bioseq_get_sequence(bs, idx),
                       gt_bioseq_get_sequence_length(bs, idx),
                       gt_encseq_alphabet(bs->encseq));
  gt_seq_set_description(seq, gt_bioseq_get_description(bs, idx));
  return seq;
}
コード例 #8
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
void gt_bioseq_show_seqlengthdistri(GtBioseq *bs, GtFile *outfp)
{
  GtDiscDistri *d;
  GtUword i;
  gt_assert(bs);
  d = gt_disc_distri_new();
  for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++)
    gt_disc_distri_add(d, gt_bioseq_get_sequence_length(bs, i));
  gt_file_xprintf(outfp, "sequence length distribution:\n");
  gt_disc_distri_show(d, outfp);
  gt_disc_distri_delete(d);
}
コード例 #9
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
void gt_bioseq_show_as_fasta(GtBioseq *bs, GtUword width, GtFile *outfp)
{
  GtUword i;

  gt_assert(bs);

  for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++) {
    char *seq = gt_bioseq_get_sequence(bs, i);
    gt_fasta_show_entry(gt_bioseq_get_description(bs, i),
                        seq,
                        gt_bioseq_get_sequence_length(bs, i), width, outfp);
    gt_free(seq);
  }
}
コード例 #10
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
void gt_bioseq_show_sequence_as_fasta(GtBioseq *bs, GtUword seqnum,
                                      GtUword width, GtFile *outfp)
{
  char *seq = NULL;
  gt_assert(bs);
  gt_assert(seqnum < gt_bioseq_number_of_sequences(bs));
  seq = gt_bioseq_get_sequence(bs, seqnum);

  gt_fasta_show_entry(gt_bioseq_get_description(bs, seqnum),
                      seq,
                      gt_bioseq_get_sequence_length(bs, seqnum), width, outfp);

  gt_free(seq);
}
コード例 #11
0
static int gt_bioseq_col_grep_desc_sequence_length(GtSeqCol *sc,
                                                   GtUword *length,
                                                   GtStr *seqid,
                                                   GtError *err)
{
  GtUword filenum = 0, seqnum = 0;
  int had_err;
  GtBioseqCol *bsc;
  bsc = gt_bioseq_col_cast(sc);
  gt_error_check(err);
  gt_assert(bsc && length && seqid);
  had_err = grep_desc(bsc, &filenum, &seqnum, seqid, err);
  if (!had_err)
    *length = gt_bioseq_get_sequence_length(bsc->bioseqs[filenum], seqnum);
  return had_err;
}
コード例 #12
0
ファイル: bioseq.c プロジェクト: ggonnella/genometools
void gt_bioseq_show_stat(GtBioseq *bs, GtFile *outfp)
{
  GtUword i, num_of_seqs;
  gt_assert(bs);
  num_of_seqs = gt_bioseq_number_of_sequences(bs);
  gt_file_xprintf(outfp, "showing statistics for sequence file \"%s\"\n",
                  gt_str_get(bs->sequence_file));
  gt_file_xprintf(outfp, "number of sequences: "GT_WU"\n", num_of_seqs);
  gt_file_xprintf(outfp, "total length: "GT_WU"\n",
                    gt_encseq_total_length(bs->encseq)
                      - gt_encseq_num_of_sequences(bs->encseq) + 1);
  for (i = 0; i < num_of_seqs; i++) {
    gt_file_xprintf(outfp, "sequence #"GT_WU" length: "GT_WU"\n", i+1,
                    gt_bioseq_get_sequence_length(bs, i));
  }
}
コード例 #13
0
int gt_bioseq_col_md5_to_sequence_length(GtSeqCol *sc, GtUword *len,
                                         GtStr *md5_seqid, GtError *err)
{
  GtUword seqnum = GT_UNDEF_UWORD;
  GtBioseq *bioseq = NULL;
  GtBioseqCol *bsc;
  int had_err = 0;
  bsc = gt_bioseq_col_cast(sc);
  gt_error_check(err);
  gt_assert(bsc && len && md5_seqid && err);
  gt_assert(gt_md5_seqid_has_prefix(gt_str_get(md5_seqid)));
  if (!(had_err = md5_to_index(&bioseq, &seqnum, bsc, md5_seqid, err))) {
    gt_assert(seqnum != GT_UNDEF_UWORD);
    *len = gt_bioseq_get_sequence_length(bioseq, seqnum);
  }
  return had_err;
}
コード例 #14
0
static int gt_seqtransform_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  SeqtransformArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  unsigned long i;
  GtBioseq *bioseq;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
    GtAlphabet *alphabet;
    bool is_protein;
    alphabet = gt_bioseq_get_alphabet(bioseq);
    is_protein = gt_alphabet_is_protein(alphabet);
    for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
      const char *desc, *suffix = NULL;
      char *seq;
      unsigned long seqlen;
      desc = gt_bioseq_get_description(bioseq, i);
      seq = gt_bioseq_get_sequence(bioseq, i);
      seqlen = gt_bioseq_get_sequence_length(bioseq, i);
      if (arguments->addstopaminos && is_protein && seqlen &&
          seq[seqlen-1] != GT_STOP_AMINO) {
        suffix = GT_STOP_AMINO_CSTR;
      }
      gt_fasta_show_entry_with_suffix(desc, seq, seqlen, suffix,
                                      arguments->width, arguments->outfp);
      gt_free(seq);
    }
    gt_bioseq_delete(bioseq);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
コード例 #15
0
static int split_description(const char *filename, GtStr *splitdesc,
                             unsigned long width, bool force, GtError *err)
{
  unsigned long i;
  GtBioseq *bioseq;
  GtStr *descname;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(filename && splitdesc && gt_str_length(splitdesc));

  descname = gt_str_new();
  if (!(bioseq = gt_bioseq_new(filename, err)))
    had_err = -1;

  for (i = 0; !had_err && i < gt_bioseq_number_of_sequences(bioseq); i++) {
    GtFile *outfp;
    char *seq;
    gt_str_reset(descname);
    gt_str_append_str(descname, splitdesc);
    gt_str_append_char(descname, '/');
    gt_str_append_cstr(descname, gt_bioseq_get_description(bioseq, i));
    gt_str_append_cstr(descname, gt_file_suffix(filename));
    if (!(outfp = gt_output_file_xopen_forcecheck(gt_str_get(descname), "w",
                                                 force, err))) {
      had_err = -1;
      break;
    }
    seq = gt_bioseq_get_sequence(bioseq, i);
    gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i), seq,
                        gt_bioseq_get_sequence_length(bioseq, i), width,
                        outfp);
    gt_free(seq);
    gt_file_delete(outfp);
  }

  gt_bioseq_delete(bioseq);
  gt_str_delete(descname);

  return had_err;
}
コード例 #16
0
ファイル: shredder.c プロジェクト: simongog/genometools
static char* generate_fragment(GtShredder *shredder,
                               unsigned long *fragment_length,
                               GtStr *desc)
{
  gt_assert(shredder && fragment_length);
  if (shredder->seqnum < gt_bioseq_number_of_sequences(shredder->bioseq)) {
    unsigned long seqlen, fraglen;
    char *frag;
    seqlen = gt_bioseq_get_sequence_length(shredder->bioseq, shredder->seqnum);
    fraglen = (shredder->maxlength == shredder->minlength
               ? 0 : gt_rand_max(shredder->maxlength - shredder->minlength))
              + shredder->minlength;
    gt_assert(fraglen >= shredder->minlength);
    if (shredder->pos + fraglen > seqlen)
      fraglen = seqlen - shredder->pos;
    *fragment_length = fraglen;
    gt_str_reset(desc);
    gt_str_append_cstr(desc, gt_bioseq_get_description(shredder->bioseq,
                                                       shredder->seqnum));
    gt_assert(shredder->pos + fraglen <= seqlen);
    frag = gt_bioseq_get_sequence_range(shredder->bioseq, shredder->seqnum,
                                        shredder->pos,
                                        shredder->pos + fraglen -1);
    if (shredder->pos + fraglen == seqlen) { /* last fragment */
      shredder->seqnum++;
      shredder->pos = 0;
    }
    else {
      if (fraglen > shredder->overlap)
        shredder->pos += fraglen - shredder->overlap;
      else
        shredder->pos++; /* go at least one base further each step */
    }
    return frag;
  }
  return NULL;
}
コード例 #17
0
ファイル: gt_sequniq.c プロジェクト: AnnSeidel/genometools
static int gt_sequniq_runner(int argc, const char **argv, int parsed_args,
                             void *tool_arguments, GtError *err)
{
  GtSequniqArguments *arguments = tool_arguments;
  GtUint64 duplicates = 0, num_of_sequences = 0;
  int i, had_err = 0;
  GtMD5Set *md5set;

  gt_error_check(err);
  gt_assert(arguments);
  md5set = gt_md5set_new(arguments->nofseqs);
  if (!arguments->seqit) {
    GtUword j;
    GtBioseq *bs;

    for (i = parsed_args; !had_err && i < argc; i++) {
      if (!(bs = gt_bioseq_new(argv[i], err)))
        had_err = -1;
      if (!had_err) {
        GtMD5SetStatus retval;
        for (j = 0; j < gt_bioseq_number_of_sequences(bs) && !had_err; j++) {
          char *seq = gt_bioseq_get_sequence(bs, j);
          retval = gt_md5set_add_sequence(md5set, seq,
                                          gt_bioseq_get_sequence_length(bs, j),
                                          arguments->rev, err);
          if (retval == GT_MD5SET_NOT_FOUND)
            gt_fasta_show_entry(gt_bioseq_get_description(bs, j), seq,
                                gt_bioseq_get_sequence_length(bs, j),
                                arguments->width, arguments->outfp);
          else if (retval != GT_MD5SET_ERROR)
            duplicates++;
          else
            had_err = -1;
          num_of_sequences++;
          gt_free(seq);
        }
        gt_bioseq_delete(bs);
      }
    }
  }
  else {
    GtSeqIterator *seqit;
    GtStrArray *files;
    off_t totalsize;
    const GtUchar *sequence;
    char *desc;
    GtUword len;

    files = gt_str_array_new();
    for (i = parsed_args; i < argc; i++)
      gt_str_array_add_cstr(files, argv[i]);
    totalsize = gt_files_estimate_total_size(files);
    seqit = gt_seq_iterator_sequence_buffer_new(files, err);
    if (!seqit)
      had_err = -1;
    if (!had_err) {
      if (arguments->verbose) {
        gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit,
                                                          (GtUint64) totalsize),
                             (GtUint64) totalsize);
      }
      while (!had_err) {
        GtMD5SetStatus retval;
        if ((gt_seq_iterator_next(seqit, &sequence, &len, &desc, err)) != 1)
          break;

        retval = gt_md5set_add_sequence(md5set, (const char*) sequence, len,
                                        arguments->rev, err);
        if (retval == GT_MD5SET_NOT_FOUND)
          gt_fasta_show_entry(desc, (const char*) sequence, len,
                              arguments->width, arguments->outfp);
        else if (retval != GT_MD5SET_ERROR)
          duplicates++;
        else
          had_err = -1;
        num_of_sequences++;
      }
      if (arguments->verbose)
        gt_progressbar_stop();
      gt_seq_iterator_delete(seqit);
    }
    gt_str_array_delete(files);
  }

  /* show statistics */
  if (!had_err) {
    fprintf(stderr,
            "# "GT_WU" out of "GT_WU" sequences have been removed (%.3f%%)\n",
            (GtUword)duplicates, (GtUword)num_of_sequences,
            ((double) duplicates / (double)num_of_sequences) * 100.0);
  }

  gt_md5set_delete(md5set);
  return had_err;
}
コード例 #18
0
ファイル: gt_sketch_page.c プロジェクト: 9beckert/TIR
static int gt_sketch_page_runner(GT_UNUSED int argc,
                                 const char **argv,
                                 int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  SketchPageArguments *arguments = tool_arguments;
  int had_err = 0;
  GtFeatureIndex *features = NULL;
  GtRange qry_range, sequence_region_range;
  GtStyle *sty = NULL;
  GtStr *prog, *gt_style_file;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtBioseq *bioseq = NULL;
  GtCanvas *canvas = NULL;
  const char *seqid = NULL, *outfile;
  unsigned long start, height, num_pages = 0;
  double offsetpos, usable_height;
  cairo_surface_t *surf = NULL;
  cairo_t *cr = NULL;
  GtTextWidthCalculator *twc;
  gt_error_check(err);

  features = gt_feature_index_memory_new();

  if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6))
    gt_warning("Your cairo library (version %s) is older than version 1.8.6! "
               "These versions contain a bug which may result in "
               "corrupted PDF output!", cairo_version_string());

  /* get style */
  sty = gt_style_new(err);
  if (gt_str_length(arguments->stylefile) == 0)
  {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, argv[0],
                          gt_cstr_length_up_to_char(argv[0], ' '));
    gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err);
    gt_str_delete(prog);
    gt_str_append_cstr(gt_style_file, "/sketch/default.style");
  }
  else
  {
    gt_style_file = gt_str_ref(arguments->stylefile);
  }
  had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err);

  outfile = argv[parsed_args];
  if (!had_err)
  {
    /* get features */
    had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err);
     if (!had_err && gt_str_length(arguments->seqid) == 0) {
      seqid = gt_feature_index_get_first_seqid(features);
      if (seqid == NULL)
      {
        gt_error_set(err, "GFF input file must contain a sequence region!");
        had_err = -1;
      }
    }
    else if (!had_err
               && !gt_feature_index_has_seqid(features,
                                              gt_str_get(arguments->seqid)))
    {
      gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                   gt_str_get(arguments->seqid));
      had_err = -1;
    }
    else if (!had_err)
      seqid = gt_str_get(arguments->seqid);
  }

  /* set text */
  if (gt_str_length(arguments->text) == 0)
  {
    gt_str_delete(arguments->text);
    arguments->text = gt_str_new_cstr(argv[parsed_args+1]);
  }

  if (!had_err)
  {
    /* set display range */
    gt_feature_index_get_range_for_seqid(features, &sequence_region_range,
                                         seqid);
    qry_range.start = (arguments->range.start == GT_UNDEF_ULONG ?
                         sequence_region_range.start :
                         arguments->range.start);
    qry_range.end   = (arguments->range.end == GT_UNDEF_ULONG ?
                         sequence_region_range.end :
                         arguments->range.end);

    /* set output format */
    if (strcmp(gt_str_get(arguments->format), "pdf") == 0)
    {
      surf = cairo_pdf_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    else if (strcmp(gt_str_get(arguments->format), "ps") == 0)
    {
      surf =  cairo_ps_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    gt_log_log("created page with %.2f:%.2f dimensions\n",
                                                  mm_to_pt(arguments->pwidth),
                                                  mm_to_pt(arguments->pheight));

    offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
    usable_height = mm_to_pt(arguments->pheight)
                              - arguments->theight
                              - arguments->theight
                              - 4*TEXT_SPACER;

    if (gt_str_length(arguments->seqfile) > 0) {
      bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err);
    }

    cr = cairo_create(surf);
    cairo_set_font_size(cr, 8);
    twc = gt_text_width_calculator_cairo_new(cr, sty);
    for (start = qry_range.start; start <= qry_range.end;
         start += arguments->width)
    {
      GtRange single_range;
      GtCustomTrack *ct = NULL;
      const char *seq;
      single_range.start = start;
      single_range.end = start + arguments->width;

      if (had_err)
        break;

      d = gt_diagram_new(features, seqid, &single_range, sty, err);
      if (!d) {
        had_err = -1;
        break;
      }
      if (bioseq) {
        seq = gt_bioseq_get_sequence(bioseq, 0);
        ct = gt_custom_track_gc_content_new(seq,
                                      gt_bioseq_get_sequence_length(bioseq, 0),
                                      800, 70, 0.4, true);
        gt_diagram_add_custom_track(d, ct);
      }

      l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err);
      had_err = gt_layout_get_height(l, &height, err);
      if (!had_err) {
        if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER
              - arguments->theight, offsetpos + height))
        {
            draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1],
                        seqid, num_pages, mm_to_pt(arguments->pwidth),
                        mm_to_pt(arguments->pheight),
                        arguments->theight);
          cairo_show_page(cr);
          offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
          num_pages++;
        }
        canvas = gt_canvas_cairo_context_new(sty,
                                             cr,
                                             offsetpos,
                                             mm_to_pt(arguments->pwidth),
                                             height,
                                             NULL,
                                             err);
        if (!canvas)
          had_err = -1;
        offsetpos += height;
        if (!had_err)
          had_err = gt_layout_sketch(l, canvas, err);
      }
      gt_canvas_delete(canvas);
      gt_layout_delete(l);
      gt_diagram_delete(d);
      if (ct)
        gt_custom_track_delete(ct);
    }
    draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid,
                num_pages, mm_to_pt(arguments->pwidth),
                mm_to_pt(arguments->pheight),
                arguments->theight);
    cairo_show_page(cr);
    num_pages++;
    gt_log_log("finished, should be %lu pages\n", num_pages);
    gt_text_width_calculator_delete(twc);
    cairo_destroy(cr);
    cairo_surface_flush(surf);
    cairo_surface_finish(surf);
    cairo_surface_destroy(surf);
    cairo_debug_reset_static_data();
    if (bioseq)
      gt_bioseq_delete(bioseq);
    gt_style_delete(sty);
    gt_str_delete(gt_style_file);
    gt_feature_index_delete(features);
  }
  return had_err;
}
コード例 #19
0
ファイル: bssm_param.c プロジェクト: potter-s/genometools
/* updates the BSSM parameterization file */
static void build_bssm(GtBioseq *bioseq, GthBSSMModel *bssm_model,
                       unsigned int hypothesisnum)
{
    GtUword mono_ct[STRINGSIZE-1][ALPHSIZE],         /* Mononuc freq */
            di_ct[STRINGSIZE-1][ALPHSIZE][ALPHSIZE]; /* Dinuc freq */
    double mono_freq,      /* Mononuc relative freq */
           di_freq;        /* Dinuc relative freq */
    GtUword i, j, k, /* Iterator variables */
            len, curlen = 0,
                 num_entries = gt_bioseq_number_of_sequences(bioseq);
    GtUchar *encoded_seq = NULL;

    /* Inits of local variables */
    for (i = 0; i < (STRINGSIZE-1); i++) {
        for (j = 0; j < ALPHSIZE; j++) {
            mono_ct[i][j] = INITVAL_INT;
            for (k = 0; k < ALPHSIZE; k++)
                di_ct[i][j][k] = INITVAL_INT;
        }
    }

    /* mononucleotides */
    for (j = 0; j < num_entries; j++) {
        len = gt_bioseq_get_sequence_length(bioseq, j);
        gt_assert(len == STRINGSIZE);
        if (len > curlen) {
            encoded_seq = gt_realloc(encoded_seq, len);
            curlen = len;
        }
        gt_bioseq_get_encoded_sequence(bioseq, encoded_seq, j);
        for (i = 0; i < (STRINGSIZE-1); i++) {
            gt_assert(encoded_seq[i] < ALPHSIZE);
            mono_ct[i][encoded_seq[i]]++;
        }
    }

    /* dinucleotides */
    for (j = 0; j < num_entries; j++) {
        len = gt_bioseq_get_sequence_length(bioseq, j);
        gt_assert(len == STRINGSIZE);
        if (len > curlen) {
            encoded_seq = gt_realloc(encoded_seq, len);
            curlen = len;
        }
        gt_bioseq_get_encoded_sequence(bioseq, encoded_seq, j);
        for (i = 0; i < (STRINGSIZE-1); i++) {
            di_ct[i][encoded_seq[i]]
            [encoded_seq[i + 1]]++;
        }
    }

    gt_free(encoded_seq);

    /* Record equilibrium frequencies (1st ``slot" in transition freqs) */
    for (i = 0; i < ALPHSIZE; i++) {
        for (j = 0; j < ALPHSIZE; j++) {
            bssm_model->hypotables
            .hypo7table[hypothesisnum][0][i][j] = (GthFlt)
                                                  mono_ct[0][i] / num_entries;
        }
    }

    /* Populate the remaining transition frequencies */
    for (k = 1; k < STRINGSIZE; k++) {
        for (i = 0; i < ALPHSIZE; i++) {
            mono_freq = (double) mono_ct[k-1][i] / num_entries;
            for (j = 0; j < ALPHSIZE; j++) {
                di_freq = (double) di_ct[k-1][i][j] / num_entries;
                if (mono_freq == 0.0) {
                    bssm_model->hypotables
                    .hypo7table[hypothesisnum][k][i][j] = (GthFlt) NULLPROB;
                }
                else {
                    bssm_model->hypotables
                    .hypo7table[hypothesisnum][k][i][j] = (GthFlt)
                                                          (di_freq / mono_freq);
                }
            }

            /* Remove non-zero transition probabilities:
               Briefly, 0.0 entries (dinucleotide absent in training corpus) are
               replaced arbitrarily by PSEUDOPROB, and non-0.0 entries p are replaced
               by p = p * (1 - 4 * PSEUDOPROB) + PSEUDOPROB */
            for (j = 0; j < ALPHSIZE; ++j) {
                /* If any entry is NULLPROB, ALL elements in the row need fixed */
                if (bssm_model->hypotables
                        .hypo7table[hypothesisnum][k][i][j] == NULLPROB) {
                    /* Fix all elements in the row, then break */
                    for (j = 0; j < ALPHSIZE; j++) {
                        if (bssm_model->hypotables
                                .hypo7table[hypothesisnum][k][i][j] == NULLPROB) {
                            bssm_model->hypotables
                            .hypo7table[hypothesisnum][k][i][j] = (GthFlt)
                                                                  PSEUDOPROB;
                        }
                        else {
                            /* Adjust non-zero transition prob */
                            bssm_model->hypotables.hypo7table[hypothesisnum][k][i][j] =
                                (GthFlt)
                                (bssm_model->hypotables.hypo7table[hypothesisnum][k][i][j] *
                                 (1 - (4 * PSEUDOPROB)) + PSEUDOPROB);
                        }
                    }
                    break;
                }
            }
        }
    }
}
コード例 #20
0
ファイル: bssm_param.c プロジェクト: potter-s/genometools
int gth_bssm_param_parameterize(GthBSSMParam *bssm_param, const char *path,
                                Termtype termtype, bool gzip, GtError *err)
{
    GtAlphabet *alphabet = NULL;
    GtBioseq *bioseq;
    GtStr *file2proc;
    GtUword i, j;
    int had_err = 0;
    gt_error_check(err);

    file2proc = gt_str_new();

    /* set version number */
    bssm_param->version_num = (unsigned char) MYVERSION;

    /* set model to true and set window sizes */
    switch (termtype) {
    case GT_DONOR_TYPE:
        bssm_param->gt_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gt_donor_model);
        break;
    case GC_DONOR_TYPE:
        bssm_param->gc_donor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->gc_donor_model);
        break;
    case AG_ACCEPTOR_TYPE:
        bssm_param->ag_acceptor_model_set = true;
        set_window_sizes_in_Bssmmodel(&bssm_param->ag_acceptor_model);
        break;
    default:
        gt_assert(0);
    }

    for (i = 0; !had_err && i < NUMOFFILES; i++) {
        /* process datafile */
        gt_str_append_cstr(file2proc, path);
        switch (termtype) {
        case GT_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GT_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case GC_DONOR_TYPE:
            gt_str_append_cstr(file2proc, "/GC_donor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        case AG_ACCEPTOR_TYPE:
            gt_str_append_cstr(file2proc, "/AG_acceptor/");
            gt_str_append_cstr(file2proc, filenames[i]);
            break;
        default:
            gt_assert(0);
        }

        if (gzip)
            gt_str_append_cstr(file2proc, ".gz");

        if (!(bioseq = gt_bioseq_new(gt_str_get(file2proc), err)))
            had_err = -1;

        if (!had_err)
            alphabet = gt_bioseq_get_alphabet(bioseq);

        /* check here if all sequences have the length 102 and correct bases at
           positions 51 and 52 (i.e., GT, GC, or AG) */
        for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) {
            GtUchar encoded_seq[2];
            /* check length */
            if (gt_bioseq_get_sequence_length(bioseq, j) != STRINGSIZE) {
                gt_error_set(err,
                             "sequence "GT_WU" in file \"%s\" does not have length %u",
                             j, gt_str_get(file2proc), STRINGSIZE);
                had_err = -1;
            }
            encoded_seq[0] = gt_bioseq_get_encoded_char(bioseq, j, 50);
            encoded_seq[1] = gt_bioseq_get_encoded_char(bioseq, j, 51);
            if (!had_err) {
                /* check base correctness */
                switch (termtype) {
                case GT_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'T')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GT "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case GC_DONOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'C')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GC "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                case AG_ACCEPTOR_TYPE:
                    if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'A') ||
                            encoded_seq[1] != gt_alphabet_encode(alphabet, 'G')) {
                        gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a AG "
                                     "sequence", j, gt_str_get(file2proc));
                        had_err = -1;
                    }
                    break;
                default:
                    gt_assert(0);
                }
            }
        }

        if (!had_err) {
            switch (termtype) {
            case GT_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gt_donor_model, i);
                break;
            case GC_DONOR_TYPE:
                build_bssm(bioseq, &bssm_param->gc_donor_model, i);
                break;
            case AG_ACCEPTOR_TYPE:
                build_bssm(bioseq, &bssm_param->ag_acceptor_model, i);
                break;
            default:
                gt_assert(0);
            }
        }

        /* reset */
        gt_str_reset(file2proc);

        /* free space */
        gt_bioseq_delete(bioseq);
    }
    gt_str_delete(file2proc);

    return had_err;
}
コード例 #21
0
ファイル: gt_seqfilter.c プロジェクト: ggonnella/genometools
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  SeqFilterArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  GtBioseq *bioseq;
  GtUint64 passed = 0, filtered = 0, num_of_sequences = 0, steps = 0;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(tool_arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) &&
         bioseq != NULL) {
    GtUword i;
    GtUint64 current_num = gt_bioseq_number_of_sequences(bioseq);
    for (i = 0;
         i < current_num &&
         (arguments->maxseqnum == GT_UNDEF_UWORD ||
          passed + 1 <= arguments->maxseqnum);
         i++) {
      char *seq;
      if ((arguments->step == 1 ||
           steps + 1 == arguments->step) &&
          (arguments->sample_prob == 1.0 ||
           gt_rand_0_to_1() <= arguments->sample_prob) &&
          (arguments->minlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
          (arguments->maxlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength)) {
        seq = gt_bioseq_get_sequence(bioseq, i);
        gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
                            seq,
                            gt_bioseq_get_sequence_length(bioseq, i),
                            arguments->width, arguments->outfp);
        gt_free(seq);
        passed++;
      }
      else {
        filtered++;
      }
      steps = (steps + 1 == arguments->step) ? 0 : steps + 1;
    }
    filtered += current_num - i;
    num_of_sequences += current_num;
    gt_bioseq_delete(bioseq);
  }

  /* show statistics */
  if (!had_err) {
    gt_assert(passed + filtered == num_of_sequences);
    fprintf(stderr, "# " GT_LLU " out of " GT_LLU
            " sequences have been removed (%.3f%%)\n",
            filtered, num_of_sequences,
            ((double) filtered / num_of_sequences) * 100.0);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}
コード例 #22
0
int main(int argc, char *argv[])
{
  const char *style_file, *png_file, *gff3_file;
  char *seqid;
  GtStyle *style;
  GtBioseq *bioseq;
  GtFeatureIndex *feature_index;
  GtRange range;
  GtDiagram *diagram;
  GtLayout *layout;
  GtCanvas *canvas;
  GtCustomTrack *custom;
  GtUword height, windowsize;
  GtError *err;

  if (argc != 9) {
    fprintf(stderr, "Usage: %s style_file PNG_file GFF3_file Seq_file seqid"
                    " start end windowsize\n",
                    argv[0]);
    return EXIT_FAILURE;
  }

  style_file = argv[1];
  png_file = argv[2];
  gff3_file = argv[3];

  /* initialize */
  gt_lib_init();

  /* create error object */
  err = gt_error_new();

  /* create style */
  if (!(style = gt_style_new(err)))
    handle_error(err);

  /* load style file */
  if (gt_style_load_file(style, style_file, err))
    handle_error(err);

  /* create feature index */
  feature_index = gt_feature_index_memory_new();

  /* add GFF3 file to index */
  if (gt_feature_index_add_gff3file(feature_index, gff3_file, err))
    handle_error(err);

  /* create diagram for first sequence ID in feature index */
  seqid = argv[5];
  if (gt_feature_index_get_range_for_seqid(feature_index, &range, seqid, err))
    handle_error(err);
  sscanf(argv[6], "%lu", &range.start);
  sscanf(argv[7], "%lu", &range.end);
  sscanf(argv[8], "%lu", &windowsize);

  diagram = gt_diagram_new(feature_index, seqid, &range, style, err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* load sequence for GC plot */
  bioseq = gt_bioseq_new(argv[4], err);
  if (gt_error_is_set(err))
    handle_error(err);

  /* create custom track with GC plot for first sequence in file,
     window size 1000, 40px height and average line at 16.5% */
  custom = gt_custom_track_gc_content_new(gt_bioseq_get_sequence(bioseq, 0),
                                          gt_bioseq_get_sequence_length(bioseq,
                                                                        0),
                                          windowsize,
                                          70,
                                          0.165,
                                          true);
  gt_diagram_add_custom_track(diagram, custom);

  /* create layout with given width, determine resulting image height */
  layout = gt_layout_new(diagram, 600, style, err);
  if (gt_error_is_set(err))
    handle_error(err);
  if (gt_layout_get_height(layout, &height, err))
    handle_error(err);

  /* create PNG canvas */
  canvas = gt_canvas_cairo_file_new(style, GT_GRAPHICS_PNG, 600, height,
                                    NULL, err);
  if (!canvas)
    handle_error(err);

  /* sketch layout on canvas */
  if (gt_layout_sketch(layout, canvas, err))
    handle_error(err);

  /* write canvas to file */
  if (gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, png_file, err))
    handle_error(err);

  /* free */
  gt_custom_track_delete(custom);
  gt_bioseq_delete(bioseq);
  gt_canvas_delete(canvas);
  gt_layout_delete(layout);
  gt_diagram_delete(diagram);
  gt_feature_index_delete(feature_index);
  gt_style_delete(style);
  gt_error_delete(err);

  /* perform static data cleanup */
  gt_lib_clean();
  return EXIT_SUCCESS;
}