Exemplo n.º 1
0
char* gt_shredder_shred(GtShredder *shredder, unsigned long *fragment_length,
                        GtStr *desc)
{
  char *frag;
  gt_assert(shredder && fragment_length);
  while ((frag = generate_fragment(shredder, fragment_length, desc))) {
    if (shredder->sample_probability == 1.0 ||
        gt_rand_0_to_1() <= shredder->sample_probability) {
      return frag;
    } else gt_free(frag);
  }
  return NULL;
}
Exemplo n.º 2
0
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  SeqFilterArguments *arguments = tool_arguments;
  GtBioseqIterator *bsi;
  GtBioseq *bioseq;
  GtUint64 passed = 0, filtered = 0, num_of_sequences = 0, steps = 0;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(tool_arguments);

  bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);

  while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) &&
         bioseq != NULL) {
    GtUword i;
    GtUint64 current_num = gt_bioseq_number_of_sequences(bioseq);
    for (i = 0;
         i < current_num &&
         (arguments->maxseqnum == GT_UNDEF_UWORD ||
          passed + 1 <= arguments->maxseqnum);
         i++) {
      char *seq;
      if ((arguments->step == 1 ||
           steps + 1 == arguments->step) &&
          (arguments->sample_prob == 1.0 ||
           gt_rand_0_to_1() <= arguments->sample_prob) &&
          (arguments->minlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
          (arguments->maxlength == GT_UNDEF_UWORD ||
           gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength)) {
        seq = gt_bioseq_get_sequence(bioseq, i);
        gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
                            seq,
                            gt_bioseq_get_sequence_length(bioseq, i),
                            arguments->width, arguments->outfp);
        gt_free(seq);
        passed++;
      }
      else {
        filtered++;
      }
      steps = (steps + 1 == arguments->step) ? 0 : steps + 1;
    }
    filtered += current_num - i;
    num_of_sequences += current_num;
    gt_bioseq_delete(bioseq);
  }

  /* show statistics */
  if (!had_err) {
    gt_assert(passed + filtered == num_of_sequences);
    fprintf(stderr, "# " GT_LLU " out of " GT_LLU
            " sequences have been removed (%.3f%%)\n",
            filtered, num_of_sequences,
            ((double) filtered / num_of_sequences) * 100.0);
  }

  gt_bioseq_iterator_delete(bsi);

  return had_err;
}