Exemplo n.º 1
0
static GtStr* region_mapping_map(GtRegionMapping *rm,
                                 const char *sequence_region, GtError *err)
{
  gt_error_check(err);
  gt_assert(rm && sequence_region);
  if (!rm->mapping)
    return gt_str_ref(gt_str_array_get_str(rm->sequence_filenames, 0));
  else
    return gt_mapping_map_string(rm->mapping, sequence_region, err);
}
static int m2i_change_target_seqids(GtFeatureNode *fn, const char *target,
                                    GtRegionMapping *region_mapping,
                                    GtError *err)
{
  GtStrArray *target_ids;
  GtArray *target_ranges, *target_strands;
  GtStr *desc, *new_seqid;
  unsigned long i;
  int had_err;
  gt_error_check(err);
  gt_assert(fn && target && region_mapping);
  target_ids = gt_str_array_new();
  target_ranges = gt_array_new(sizeof (GtRange));
  target_strands = gt_array_new(sizeof (GtStrand));
  desc = gt_str_new();
  new_seqid = gt_str_new();
  had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                       target_ids,
                                                       target_ranges,
                                                       target_strands, "", 0,
                                                       err);
  for (i = 0; !had_err && i < gt_str_array_size(target_ids); i++) {
    GtStr *seqid;
    gt_str_reset(desc);
    gt_str_reset(new_seqid);
    seqid = gt_str_array_get_str(target_ids, i);
    had_err = gt_region_mapping_get_description(region_mapping, desc, seqid,
                                                err);
    if (!had_err)
      gt_regular_seqid_save(new_seqid, desc);
      gt_str_array_set(target_ids, i, new_seqid);
  }
  if (!had_err) {
    GtStr *new_target = gt_str_new();
    gt_gff3_parser_build_target_str(new_target, target_ids, target_ranges,
                                    target_strands);
    gt_feature_node_set_attribute(fn, GT_GFF_TARGET, gt_str_get(new_target));
    gt_str_delete(new_target);
  }
  gt_str_delete(new_seqid);
  gt_str_delete(desc);
  gt_array_delete(target_strands);
  gt_array_delete(target_ranges);
  gt_str_array_delete(target_ids);
  return had_err;
}
Exemplo n.º 3
0
static GtOPrval parse_options(int *parsed_args,
                              Cmppairwiseopt *pw,
                              int argc, const char **argv, GtError *err)
{
  GtOptionParser *op;
  GtOption *optionstrings,
         *optionfiles,
         *optioncharlistlen,
         *optiontext,
         *optionshowedist;
  GtStrArray *charlistlen;
  GtOPrval oprval;

  gt_error_check(err);
  charlistlen = gt_str_array_new();
  pw->strings = gt_str_array_new();
  pw->files = gt_str_array_new();
  pw->text = gt_str_new();
  pw->charlistlen = NULL;
  pw->showedist = false;
  op = gt_option_parser_new("options", "Apply function to pairs of strings.");
  gt_option_parser_set_mail_address(op, "<*****@*****.**>");

  optionstrings = gt_option_new_string_array("ss", "use two strings",
                                             pw->strings);
  gt_option_parser_add_option(op, optionstrings);

  optionfiles = gt_option_new_filename_array("ff", "use two files",
                                             pw->files);
  gt_option_parser_add_option(op, optionfiles);

  optioncharlistlen = gt_option_new_string_array("a",
                                             "use character list and length",
                                             charlistlen);
  gt_option_parser_add_option(op, optioncharlistlen);

  optiontext = gt_option_new_string("t", "use text", pw->text, NULL);
  gt_option_parser_add_option(op, optiontext);

  optionshowedist = gt_option_new_bool("e", "output unit edit distance",
                      &pw->showedist, false);
  gt_option_parser_add_option(op, optionshowedist);

  gt_option_exclude(optionstrings, optionfiles);
  gt_option_exclude(optionstrings, optioncharlistlen);
  gt_option_exclude(optionstrings, optiontext);
  gt_option_exclude(optionfiles, optioncharlistlen);
  gt_option_exclude(optionfiles, optiontext);
  gt_option_exclude(optioncharlistlen, optiontext);
  gt_option_imply(optionshowedist, optionstrings);

  oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc,
                                  err);
  if (oprval == GT_OPTION_PARSER_OK)
  {
    if (gt_option_is_set(optionstrings))
    {
      if (gt_str_array_size(pw->strings) != 2UL)
      {
        gt_error_set(err, "option -ss requires two string arguments");
        oprval = GT_OPTION_PARSER_ERROR;
      }
    } else
    {
      if (gt_option_is_set(optionfiles))
      {
        if (gt_str_array_size(pw->files) != 2UL)
        {
          gt_error_set(err, "option -ff requires two filename arguments");
          oprval = GT_OPTION_PARSER_ERROR;
        }
      } else
      {
        if (gt_option_is_set(optioncharlistlen))
        {
          GtWord readint;

          if (gt_str_array_size(charlistlen) != 2UL)
          {
            gt_error_set(err,
                         "option -a requires charlist and length argument");
            oprval = GT_OPTION_PARSER_ERROR;
          }
          pw->charlistlen = gt_malloc(sizeof *pw->charlistlen);
          pw->charlistlen->charlist =
            gt_str_ref(gt_str_array_get_str(charlistlen,
                                                                  0));
          if (sscanf(gt_str_array_get(charlistlen,1UL), GT_WD, &readint) != 1 ||
              readint < 1L)
          {
            gt_error_set(err,
                         "option -a requires charlist and length argument");
            oprval = GT_OPTION_PARSER_ERROR;
          }
          pw->charlistlen->len = (GtUword) readint;
        } else
        {
          if (!gt_option_is_set(optiontext))
          {
            gt_error_set(err,
                         "use exactly one of the options -ss, -ff, -a, -t");
            oprval = GT_OPTION_PARSER_ERROR;
          }
        }
      }
    }
  }
  gt_option_parser_delete(op);
  if (oprval == GT_OPTION_PARSER_OK && *parsed_args != argc)
  {
    gt_error_set(err, "superfluous program parameters");
    oprval = GT_OPTION_PARSER_ERROR;
  }
  gt_str_array_delete(charlistlen);
  return oprval;
}
Exemplo n.º 4
0
int gt_paircmp(int argc, const char **argv, GtError *err)
{
  int parsed_args;
  Cmppairwiseopt cmppairwise;
  GtOPrval oprval;

  gt_error_check(err);

  oprval = parse_options(&parsed_args, &cmppairwise, argc, argv, err);
  if (oprval == GT_OPTION_PARSER_OK)
  {
    gt_assert(parsed_args == argc);
    showsimpleoptions(&cmppairwise);
    if (cmppairwise.showedist)
    {
      GtUword edist, len1, len2;
      GtStr *s1, *s2;

      gt_assert(gt_str_array_size(cmppairwise.strings) >= 2);
      s1 = gt_str_array_get_str(cmppairwise.strings,0);
      s2 = gt_str_array_get_str(cmppairwise.strings,1UL);
      len1 = gt_str_length(s1);
      len2 = gt_str_length(s2);
      edist = gt_computegreedyunitedist((const GtUchar *) gt_str_get(s1),
                                        len1,
                                        (const GtUchar *) gt_str_get(s2),
                                        len2);
      printf(GT_WU " " GT_WU " " GT_WU " " GT_WU "%% errors\n",
             edist, len1,len2,(200 * edist)/(len1+len2));
    }
    else if (cmppairwise.print)
    {
      gt_print_edist_alignment(
        (const GtUchar *) gt_str_array_get(cmppairwise.strings,0),0,
        (GtUword) strlen(gt_str_array_get(cmppairwise.strings,0)),
        (const GtUchar *) gt_str_array_get(cmppairwise.strings,1UL),0,
        (GtUword) strlen(gt_str_array_get(cmppairwise.strings,1UL)));
    } else
    {
      size_t idx;
      Checkfunctiontabentry checkfunction_tab[] = {
        MAKECheckfunctiontabentry(gt_checkgreedyunitedist),
        MAKECheckfunctiontabentry(gt_checklinearspace),
        MAKECheckfunctiontabentry(gt_checklinearspace_local),
        MAKECheckfunctiontabentry(gt_checkaffinelinearspace),
        MAKECheckfunctiontabentry(gt_checkaffinelinearspace_local)
      };
      for (idx = 0; idx < sizeof checkfunction_tab/sizeof checkfunction_tab[0];
           idx++)
      {
        GtUword testcases
          = applycheckfunctiontosimpleoptions(checkfunction_tab[idx].function,
                                              &cmppairwise);
        printf("# number of testcases for %s: " GT_WU "\n",
               checkfunction_tab[idx].name,testcases);
      }
    }
  }
  freesimpleoption(&cmppairwise);
  if (oprval == GT_OPTION_PARSER_REQUESTS_EXIT)
  {
    return 0;
  }
  if (oprval == GT_OPTION_PARSER_ERROR)
  {
    return -1;
  }
  return 0;
}
Exemplo n.º 5
0
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha,
                                 bool descs, GtQualRange qrange, GtTimer *timer,
                                 GtError *err)
{
    GtBaseQualDistr *bqd;
    GtHcrEncoder *hcr_enc;
    GtSeqIterator *seqit;
    GtStrArray *file;
    int had_err = 0,
        status;
    GtUword len1,
            len2,
            i,
            num_of_reads = 0;
    const GtUchar *seq,
          *qual;
    char *desc;

    gt_error_check(err);
    gt_assert(alpha && files);

    if (timer != NULL)
        gt_timer_show_progress(timer, "get <base,qual> distr", stdout);

    if (qrange.start != GT_UNDEF_UINT)
        if (qrange.start == qrange.end) {
            gt_error_set(err, "qrange.start must unequal qrange.end");
            return NULL;
        }

    hcr_enc = gt_malloc(sizeof (GtHcrEncoder));
    hcr_enc->files = files;
    hcr_enc->num_of_files = gt_str_array_size(files);
    hcr_enc->num_of_reads = 0;
    hcr_enc->page_sampling = false;
    hcr_enc->regular_sampling = false;
    hcr_enc->sampling_rate = 0;
    hcr_enc->pagesize = gt_pagesize();
    if (descs) {
        hcr_enc->encdesc_encoder = gt_encdesc_encoder_new();
        if (timer != NULL)
            gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer);
    }
    else
        hcr_enc->encdesc_encoder = NULL;

    hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder));
    hcr_enc->seq_encoder->alpha = alpha;
    hcr_enc->seq_encoder->sampling = NULL;
    hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files,
                                      sizeof (*(hcr_enc->seq_encoder->fileinfos)));
    hcr_enc->seq_encoder->qrange = qrange;
    bqd = hcr_base_qual_distr_new(alpha, qrange);

    /* check if reads in the same file are of same length and get
       <base, quality> pair distribution */
    for (i = 0; i < hcr_enc->num_of_files; i++) {
        file = gt_str_array_new();
        gt_str_array_add(file, gt_str_array_get_str(files, i));
        seqit = gt_seq_iterator_fastq_new(file, err);
        if (!seqit) {
            gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object");
            had_err = -1;
        }
        if (!had_err) {
            gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha));
            gt_seq_iterator_set_quality_buffer(seqit, &qual);
            status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err);

            if (status == 1) {
                num_of_reads = 1UL;
                while (!had_err) {
                    status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err);
                    if (status == -1)
                        had_err = -1;
                    if (status != 1)
                        break;
                    if (len2 != len1) {
                        gt_error_set(err, "reads have to be of equal length");
                        had_err = -1;
                        break;
                    }
                    if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0)
                        had_err = -1;
                    len1 = len2;
                    num_of_reads++;
                }
            }
            else if (status == -1)
                had_err = -1;

            if (!had_err) {
                if (i == 0)
                    hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads;
                else
                    hcr_enc->seq_encoder->fileinfos[i].readnum =
                        hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads;
                hcr_enc->seq_encoder->fileinfos[i].readlength = len1;
            }
        }
        hcr_enc->num_of_reads += num_of_reads;
        gt_str_array_delete(file);
        gt_seq_iterator_delete(seqit);
    }
    if (!had_err)
        hcr_base_qual_distr_trim(bqd);

    if (!had_err) {
        if (timer != NULL)
            gt_timer_show_progress(timer, "build huffman tree for sequences and"
                                   " qualities", stdout);
        hcr_enc->seq_encoder->huffman =
            gt_huffman_new(bqd,
                           hcr_base_qual_distr_func,
                           (GtUword) bqd->ncols * bqd->nrows);
    }
    if (!had_err) {
        hcr_enc->seq_encoder->qual_offset = bqd->qual_offset;
        hcr_base_qual_distr_delete(bqd);
        return hcr_enc;
    }
    return NULL;
}
Exemplo n.º 6
0
static int gt_genomediff_runner(int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
    bool mirrored = false;
    int had_err = 0,
        i;
    GtEncseq              *encseq = NULL;
    GtGenomediffArguments *arguments = tool_arguments;
    GtLogger              *logger;
    GtShuUnitFileInfo     *unit_info = NULL;
    GtTimer               *timer = NULL;

    gt_error_check(err);
    gt_assert(arguments);

    logger = gt_logger_new(arguments->verbose,
                           GT_LOGGER_DEFLT_PREFIX,
                           stdout);
    gt_assert(logger);

    for (i = parsed_args; i < argc; i++) {
        gt_str_array_add_cstr(arguments->filenames, argv[i]);
    }

    if (gt_showtime_enabled()) {
        timer = gt_timer_new_with_progress_description("start");
        gt_timer_start(timer);
        gt_assert(timer);
    }

    if (arguments->with_units) {
        gt_logger_log(logger, "unitfile option set, filename is %s\n",
                      gt_str_get(arguments->unitfile));
    }

    if (timer != NULL)
        gt_timer_show_progress(timer, "start shu search", stdout);

    if (gt_str_array_size(arguments->filenames) > 1UL) {
        GtEncseqEncoder *ee = gt_encseq_encoder_new();
        gt_encseq_encoder_set_timer(ee, timer);
        gt_encseq_encoder_set_logger(ee, logger);
        /* kr only makes sense for dna, so we can check this already with ee */
        gt_encseq_encoder_set_input_dna(ee);
        had_err = gt_encseq_encoder_encode(ee, arguments->filenames,
                                           gt_str_get(arguments->indexname), err);
        gt_encseq_encoder_delete(ee);
    }
    else {
        gt_str_append_str(arguments->indexname,
                          gt_str_array_get_str(arguments->filenames, 0));
        if (arguments->with_esa || arguments->with_pck) {
            GtStr *current_line = gt_str_new();
            FILE *prj_fp;
            const char *buffer;
            char **elements = NULL;

            prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname),
                                             GT_PROJECTFILESUFFIX,"rb",err);
            if (prj_fp == NULL)
                had_err = -1;
            while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) {
                buffer = gt_str_get(current_line);
                if (elements != NULL) {
                    gt_free(elements[0]);
                    gt_free(elements[1]);
                }
                gt_free(elements);
                elements = gt_cstr_split(buffer, '=');
                gt_log_log("%s", elements[0]);
                if (strcmp("mirrored", elements[0]) == 0) {
                    gt_log_log("%s", elements[1]);
                    if (strcmp("1", elements[1]) == 0) {
                        mirrored = true;
                        gt_log_log("sequences are treated as mirrored");
                    }
                }
                gt_str_reset(current_line);
            }
            gt_str_delete(current_line);
            if (elements != NULL) {
                gt_free(elements[0]);
                gt_free(elements[1]);
            }
            gt_free(elements);
            gt_fa_xfclose(prj_fp);
        }
    }

    if (!had_err) {
        GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts,
                             err);
        if (mirrored)
            gt_encseq_loader_mirror(el);
        encseq =
            gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err);
        gt_encseq_loader_delete(el);
    }
    if (encseq == NULL)
        had_err = -1;
    if (!had_err) {
        unit_info = gt_shu_unit_info_new(encseq);
        if (arguments->with_units)
            had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info,
                                                 logger, err);
    }

    if (!had_err) {
        uint64_t **shusums = NULL;
        if (arguments->with_esa || arguments->with_pck) {
            shusums = gt_genomediff_shulen_sum(arguments, unit_info,
                                               logger, timer, err);
            if (shusums == NULL)
                had_err = -1;
        }
        else {
            const bool doesa = true;
            GenomediffInfo gd_info;
            Suffixeratoroptions sopts;
            sopts.beverbose = arguments->verbose;
            sopts.indexname = arguments->indexname;
            sopts.db = NULL;
            sopts.encopts = NULL;
            sopts.genomediff = true;
            sopts.inputindex = arguments->indexname;
            sopts.loadopts = arguments->loadopts;
            sopts.showprogress = false;
            sopts.idxopts = arguments->idxopts;

            gt_assert(unit_info != NULL);
            gt_array2dim_calloc(shusums, unit_info->num_of_genomes,
                                unit_info->num_of_genomes);
            gd_info.shulensums = shusums;
            gd_info.unit_info = unit_info;
            had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err);
        }
        if (!had_err && shusums != NULL) {
            had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info,
                                            arguments->with_pck, logger, timer, err);
            gt_array2dim_delete(shusums);
        }
    }

    if (timer != NULL) {
        gt_timer_show_progress_final(timer, stdout);
        gt_timer_delete(timer);
    }
    gt_logger_delete(logger);
    gt_encseq_delete(encseq);
    gt_shu_unit_info_delete(unit_info);

    return had_err;
}
Exemplo n.º 7
0
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}
Exemplo n.º 8
0
int gt_paircmp(int argc, const char **argv, GtError *err)
{
  int parsed_args;
  Cmppairwiseopt cmppairwise;
  GtOPrval oprval;
  GtFastaReader *reader0 = NULL,
                *reader1 = NULL;

  gt_error_check(err);

  oprval = parse_options(&parsed_args, &cmppairwise, argc, argv, err);
  if (oprval == GT_OPTION_PARSER_OK)
  {
    gt_assert(parsed_args == argc);
    showsimpleoptions(&cmppairwise);
    if (cmppairwise.showedist)
    {
      GtUword edist, len1, len2;
      GtStr *s1, *s2;

      gt_assert(gt_str_array_size(cmppairwise.strings) >= 2);
      s1 = gt_str_array_get_str(cmppairwise.strings,0);
      s2 = gt_str_array_get_str(cmppairwise.strings,1UL);
      len1 = gt_str_length(s1);
      len2 = gt_str_length(s2);
      edist = gt_computegreedyunitedist((const GtUchar *) gt_str_get(s1),
                                        len1,
                                        (const GtUchar *) gt_str_get(s2),
                                        len2);
      printf(GT_WU " " GT_WU " " GT_WU " " GT_WU "%% errors\n",
             edist, len1,len2,(200 * edist)/(len1+len2));
    }
    else if (cmppairwise.print)
    {
      const GtStr *str0 = gt_str_array_get_str(cmppairwise.strings,0),
                  *str1 = gt_str_array_get_str(cmppairwise.strings,1);

      gt_print_edist_alignment((const GtUchar *) gt_str_get(str0),0,
                               gt_str_length(str0),
                               (const GtUchar *) gt_str_get(str1),0,
                               gt_str_length(str1));
    } else
    {
      size_t idx;
      Checkfunctiontabentry checkfunction_tab[] = {
        MAKECheckfunctiontabentry(gt_checkgreedyunitedist),
        MAKECheckfunctiontabentry(gt_checklinearspace),
        MAKECheckfunctiontabentry(gt_checklinearspace_local),
        MAKECheckfunctiontabentry(gt_checkaffinelinearspace),
        MAKECheckfunctiontabentry(gt_checkaffinelinearspace_local),
        MAKECheckfunctiontabentry(gt_checkdiagonalbandalign),
        MAKECheckfunctiontabentry(gt_checkdiagonalbandaffinealign)
      };

      if (cmppairwise.fasta)
      {
        gt_assert(gt_str_array_size(cmppairwise.files) == 3);
        cmppairwise.fastasequences0 = gt_str_array_new();
        cmppairwise.fastasequences1 = gt_str_array_new();

        reader0 = gt_fasta_reader_rec_new(gt_str_array_get_str(
                                                        cmppairwise.files,1UL));
        gt_fasta_reader_run(reader0, NULL, save_fastaentry,
                            NULL, cmppairwise.fastasequences0, err);
        reader1 = gt_fasta_reader_rec_new (gt_str_array_get_str(
                                                        cmppairwise.files,2UL));
        gt_fasta_reader_run(reader1, NULL, save_fastaentry,
                            NULL, cmppairwise.fastasequences1, err);
        gt_error_check(err);
      }
      for (idx = 0; idx < sizeof checkfunction_tab/sizeof checkfunction_tab[0];
           idx++)
      {
        GtUword testcases;

        printf("run %s\n",checkfunction_tab[idx].name);
        testcases
          = applycheckfunctiontosimpleoptions(checkfunction_tab[idx].function,
                                              &cmppairwise);
        printf("# number of testcases for %s: " GT_WU "\n",
               checkfunction_tab[idx].name,testcases);
      }
      gt_fasta_reader_delete(reader0);
      gt_fasta_reader_delete(reader1);
    }
  }
  freesimpleoption(&cmppairwise);
  if (oprval == GT_OPTION_PARSER_REQUESTS_EXIT)
  {
    return 0;
  }
  if (oprval == GT_OPTION_PARSER_ERROR)
  {
    return -1;
  }
  return 0;
}
Exemplo n.º 9
0
static int gff3_in_stream_plain_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtGFF3InStreamPlain *is = gff3_in_stream_plain_cast(ns);
  GtStr *filenamestr;
  int had_err = 0, status_code;

  gt_error_check(err);

  if (gt_queue_size(is->genome_node_buffer) > 1) {
    /* we still have at least two nodes in the buffer -> serve from there */
    *gn = gt_queue_get(is->genome_node_buffer);
    return 0;
  }

  /* the buffer is empty or has one element */
  gt_assert(gt_queue_size(is->genome_node_buffer) <= 1);

  for (;;) {
    /* open file if necessary */
    if (!is->file_is_open) {
      if (gt_str_array_size(is->files) &&
          is->next_file == gt_str_array_size(is->files)) {
        break;
      }
      if (gt_str_array_size(is->files)) {
        if (strcmp(gt_str_array_get(is->files, is->next_file), "-") == 0) {
          if (is->stdin_argument) {
            gt_error_set(err, "multiple specification of argument file \"-\"");
            had_err = -1;
            break;
          }
          is->fpin = gt_file_xopen(NULL, "r");
          is->file_is_open = true;
          is->stdin_argument = true;
        }
        else {
          is->fpin = gt_file_xopen(gt_str_array_get(is->files,
                                                       is->next_file), "r");
          is->file_is_open = true;
        }
        is->next_file++;
      }
      else {
        if (is->stdin_processed)
          break;
        is->fpin = NULL;
        is->file_is_open = true;
      }
      is->line_number = 0;

      if (!had_err && is->progress_bar) {
        printf("processing file \"%s\"\n", gt_str_array_size(is->files)
               ? gt_str_array_get(is->files, is->next_file-1) : "stdin");
      }
      if (!had_err && is->fpin && is->progress_bar) {
        gt_progressbar_start(&is->line_number,
                            gt_file_number_of_lines(gt_str_array_get(is->files,
                                                             is->next_file-1)));
      }
    }

    gt_assert(is->file_is_open);

    filenamestr = gt_str_array_size(is->files)
                  ? gt_str_array_get_str(is->files, is->next_file-1)
                  : is->stdinstr;
    /* read two nodes */
    had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                is->genome_node_buffer,
                                                is->used_types, filenamestr,
                                                &is->line_number, is->fpin,
                                                err);
    if (had_err)
      break;
    if (status_code != EOF) {
      had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                  is->genome_node_buffer,
                                                  is->used_types, filenamestr,
                                                  &is->line_number, is->fpin,
                                                  err);
      if (had_err)
        break;
    }

    if (status_code == EOF) {
      /* end of current file */
      if (is->progress_bar) gt_progressbar_stop();
      gt_file_delete(is->fpin);
      is->fpin = NULL;
      is->file_is_open = false;
      gt_gff3_parser_reset(is->gff3_parser);
      if (!gt_str_array_size(is->files)) {
        is->stdin_processed = true;
        break;
      }
      continue;
    }

    gt_assert(gt_queue_size(is->genome_node_buffer));

    /* make sure the parsed nodes are sorted */
    if (is->ensure_sorting && gt_queue_size(is->genome_node_buffer) > 1) {
      GtGenomeNode *last_node = NULL;
      /* a sorted stream can have at most one input file */
      gt_assert(gt_str_array_size(is->files) == 0 ||
                gt_str_array_size(is->files) == 1);
      had_err = gt_queue_iterate(is->genome_node_buffer, buffer_is_sorted,
                                 &last_node, err);
    }
    if (!had_err) {
      *gn = gt_queue_get(is->genome_node_buffer);
    }
    return had_err;
  }
  gt_assert(!gt_queue_size(is->genome_node_buffer));
  *gn = NULL;
  return had_err;
}