Пример #1
0
/* Formats a given position number for short display in the ruler. */
void gt_format_ruler_label(char *txt,  GtUword pos,
                           const char *unitstr, size_t buflen)
{
  double fpos;
  int logval;
  GtStr *formatstring;
  gt_assert(txt);

  logval = (int) floor(log10(pos));
  formatstring = gt_str_new_cstr("%.");

  if (pos >= 1000000000)
  {
    fpos = (double) pos / 1000000000;
    while (pos % 10 == 0)
    {
      pos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_ulong(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fG%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  }
  else if (pos >= 1000000)
  {
    fpos = (double) pos / 1000000;
    while (pos % 10 == 0)
    {
      pos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_ulong(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fM%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  }
  else if (pos >= 1000)
  {
    fpos = (double) pos / 1000;
    while (pos % 10 == 0)
    {
      pos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_ulong(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fk%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  } else {
    /*@ignore@*/
    (void) snprintf(txt, buflen, " "GT_WU"%s", pos, unitstr);
    /*@end@*/
  }

  gt_str_delete(formatstring);
}
Пример #2
0
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr)
{
  GtGenomeNode *gn;
  gt_assert(fn && outstr);
  gn = (GtGenomeNode*) fn;
  gt_str_append_str(outstr, gt_genome_node_get_seqid(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_source(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_type(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_ulong(outstr, gt_genome_node_get_start(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_ulong(outstr, gt_genome_node_get_end(gn));
  gt_str_append_char(outstr, '\t');
  if (gt_feature_node_score_is_defined(fn)) {
    char buf[BUFSIZ];
    (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn));
    gt_str_append_cstr(outstr, buf);
  } else
    gt_str_append_char(outstr, '.');
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]);
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]);
  gt_str_append_char(outstr, '\t');
}
Пример #3
0
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && rn);
  gff3_version_string(nv);
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "%s   %s "GT_WU" "GT_WU"\n",
                    GT_GFF_SEQUENCE_REGION,
                    gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)),
                    gt_genome_node_get_start((GtGenomeNode*) rn),
                    gt_genome_node_get_end((GtGenomeNode*) rn));
  } else {
    gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION);
    gt_str_append_cstr(gff3_visitor->outstr, "   ");
    gt_str_append_cstr(gff3_visitor->outstr,
                      gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_start((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_end((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
Пример #4
0
static void set_gff3_target_attribute(GthSA *sa, bool md5ids)
{
  gt_assert(sa && !sa->gff3_target_attribute);
  sa->gff3_target_attribute = gt_str_new();
  if (md5ids) {
    gt_assert(sa->ref_md5);
    gt_str_append_cstr(sa->gff3_target_attribute, GT_MD5_SEQID_PREFIX);
    gt_str_append_str(sa->gff3_target_attribute, sa->ref_md5);
    gt_str_append_char(sa->gff3_target_attribute, ':');
  }
  gt_gff3_escape(sa->gff3_target_attribute, gt_str_get(sa->ref_id),
                 gt_str_length(sa->ref_id));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_ulong(sa->gff3_target_attribute,
                      gth_sa_referencecutoff_start(sa) + 1); /* XXX: use
                                                                reference
                                                                dpstartpos */
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_ulong(sa->gff3_target_attribute,
                      gth_sa_ref_total_length(sa) - /* XXX */
                      gth_sa_referencecutoff_end(sa));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  if (sa->ref_strand_forward) {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_FORWARD]);
  }
  else {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_REVERSE]);
  }
}
Пример #5
0
static void construct_description(GtStr *description, const char *type,
                                  GtUword counter, bool join,
                                  bool translate, GtStr *seqid,
                                  GtStrArray *target_ids)
{
  gt_assert(!gt_str_length(description));
  gt_str_append_cstr(description, type);
  gt_str_append_char(description, '_');
  gt_str_append_ulong(description, counter);
  if (join)
    gt_str_append_cstr(description, " (joined)");
  if (translate)
    gt_str_append_cstr(description, " (translated)");
  if (seqid) {
    gt_assert(gt_str_length(seqid));
    gt_str_append_cstr(description, " [seqid ");
    gt_str_append_str(description, seqid);
    gt_str_append_char(description, ']');
  }
  if (target_ids && gt_str_array_size(target_ids)) {
    GtUword i;
    gt_str_append_cstr(description, " [target IDs ");
    gt_str_append_cstr(description, gt_str_array_get(target_ids, 0));
    for (i = 1; i < gt_str_array_size(target_ids); i++) {
      gt_str_append_char(description, ',');
      gt_str_append_cstr(description, gt_str_array_get(target_ids, i));
    }
    gt_str_append_char(description, ']');
  }
}
Пример #6
0
static void close_output_files(Store_in_subset_file_data
                               *store_in_subset_file_data)
{
  unsigned long i;
  GtStr *buf;

  buf = gt_str_new();
  for (i = 0; i < store_in_subset_file_data->num_of_subset_files; i++) {
    if (store_in_subset_file_data->subset_files[i]) {
      if (store_in_subset_file_data->gthsplitinfo->showverbose) {
        gt_str_reset(buf);
        gt_str_append_cstr(buf, "split file created: ");
        gt_str_append_str(buf, store_in_subset_file_data->subset_filenames[i]);
        gt_str_append_cstr(buf, " (size=");
        gt_str_append_ulong(buf,
                          store_in_subset_file_data->subset_file_sa_counter[i]);
        gt_str_append_cstr(buf, ")");
        store_in_subset_file_data->gthsplitinfo->showverbose(gt_str_get(buf));
      }
      gt_assert(store_in_subset_file_data->subset_filenames[i]);
      /* put XML trailer in file before closing it */
      gth_xml_show_trailer(true, store_in_subset_file_data->subset_files[i]);
      gt_file_delete(store_in_subset_file_data->subset_files[i]);
      gt_str_delete(store_in_subset_file_data->subset_filenames[i]);
      store_in_subset_file_data->subset_files[i]           = NULL;
      store_in_subset_file_data->subset_file_sa_counter[i] = 0;
    }
  }
  gt_str_delete(buf);
}
Пример #7
0
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv,
                                                GtHMMERModelHit *modelhit,
                                                GtHMMERSingleHit *singlehit)
{
  GT_UNUSED GtUword i;
  GtGenomeNode *gf;
  int had_err = 0;
  GtRange rrng;
  gt_assert(lv && singlehit);

  rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit);

  if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) {
    char buf[32];
    gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*)
                                                      lv->ltr_retrotrans),
                             gt_ft_protein_match,
                             rrng.start,
                             rrng.end,
                             singlehit->strand);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment",
                                 gt_str_ref(singlehit->alignment),
                                 (GtFree) gt_str_delete);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq",
                                 gt_str_ref(singlehit->aastring),
                                 (GtFree) gt_str_delete);
    gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag);
    gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue);
    (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame);
    gt_feature_node_add_attribute((GtFeatureNode*) gf,
                                    "reading_frame", buf);
    if (modelhit->modelname != NULL) {
      gt_feature_node_add_attribute((GtFeatureNode*) gf, "name",
                                    modelhit->modelname);
    }
    if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) {
      GtStr *buffer;
      GtUword j;
      gt_assert(singlehit->chains != NULL);
      buffer = gt_str_new();
      for (j = 0UL; j < gt_array_size(singlehit->chains); j++) {
        gt_str_append_cstr(buffer, modelhit->modelname);
        gt_str_append_char(buffer, ':');
        gt_str_append_ulong(buffer,
                          *(GtUword*) gt_array_get(singlehit->chains, j));
        if (j != gt_array_size(singlehit->chains) - 1) {
          gt_str_append_char(buffer, ',');
        }
      }
      gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains",
                                    gt_str_get(buffer));
      gt_str_delete(buffer);
    }
    gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf);
  }
  gt_array_delete(singlehit->chains);
  singlehit->chains = NULL;
  return had_err;
}
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd,
                                             unsigned long amount,
                                             GtTimer *timer,
                                             GtError *err) {
  char qual[BUFSIZ] = {0},
       seq[BUFSIZ] = {0};
  int had_err = 0;
  unsigned long rand,
                max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1,
                count;

  GtStr *timer_comment = gt_str_new_cstr("extracting ");
  GtStr *desc = gt_str_new();

  gt_str_append_ulong(timer_comment, amount);
  gt_str_append_cstr(timer_comment, " reads of ");
  gt_str_append_ulong(timer_comment, max_rand + 1);
  gt_str_append_cstr(timer_comment, "!");

  if (timer == NULL) {
    timer = gt_timer_new_with_progress_description("extract random reads");
    gt_timer_start(timer);
  }
  else {
    gt_timer_show_progress(timer, "extract random reads", stdout);
  }

  gt_log_log("%s",gt_str_get(timer_comment));
  for (count = 0; count < amount; count++) {
    if (!had_err) {
      rand = gt_rand_max(max_rand);
      gt_log_log("get read: %lu", rand);
      had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err);
      gt_log_log("%s",gt_str_get(desc));
      gt_log_log("%s",seq);
      gt_log_log("%s",qual);
    }
  }
  gt_str_delete(timer_comment);
  gt_str_delete(desc);
  if (!gt_showtime_enabled())
    gt_timer_delete(timer);
  return had_err;
}
Пример #9
0
static void* test_symbol(GT_UNUSED void *data)
{
  GtStr *symbol;
  GtUword i;
  symbol = gt_str_new();
  for (i = 0; i < NUMBER_OF_SYMBOLS; i++) {
    gt_str_reset(symbol);
    gt_str_append_ulong(symbol, gt_rand_max(MAX_SYMBOL));
    gt_symbol(gt_str_get(symbol));
    gt_assert(!strcmp(gt_symbol(gt_str_get(symbol)), gt_str_get(symbol)));
  }
  gt_str_delete(symbol);
  return NULL;
}
Пример #10
0
static int gt_seqtranslate_do_translation(GtTranslateArguments *arguments,
                                       const char *sequence,
                                       GtUword length,
                                       const char *desc,
                                       GtStr **translations,
                                       bool rev,
                                       GtError *err)
{
  GtTranslator *tr;
  GT_UNUSED GtTranslatorStatus trst;
  GtCodonIterator *ci;
  char translated;
  int had_err = 0;
  GtStr *str;
  unsigned int frame,
               i;

  ci = gt_codon_iterator_simple_new(sequence, length, err);
  tr = gt_translator_new(ci);
  trst = gt_translator_next(tr, &translated, &frame, err);
  while (trst == GT_TRANSLATOR_OK) {
    gt_str_append_char(translations[frame], translated);
    trst = gt_translator_next(tr, &translated, &frame, err);
  }
  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  if (trst == GT_TRANSLATOR_ERROR)
    return -1;
  str = gt_str_new();
  for (i = 0; i < 3; i++) {
    if (gt_str_length(translations[i]) > 0) {
      gt_str_append_cstr(str, desc);
      gt_str_append_cstr(str, " (");
      gt_str_append_ulong(str, i+1);
      gt_str_append_cstr(str, rev ? "-" : "+");
      gt_str_append_cstr(str, ")");
      gt_fasta_show_entry(gt_str_get(str), gt_str_get(translations[i]),
                          gt_str_length(translations[i]),
                          arguments->fasta_width, arguments->outfp);
      gt_str_reset(translations[i]);
      gt_str_reset(str);
    }
  }
  gt_str_delete(str);
  return had_err;
}
Пример #11
0
static GtStr* create_unique_id(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn)
{
  const char *type;
  GtStr *id;
  gt_assert(gff3_visitor && fn);
  type = gt_feature_node_get_type(fn);

  /* increase id counter */
  gt_string_distri_add(gff3_visitor->id_counter, type);

  /* build id string */
  id = gt_str_new_cstr(type);
  gt_str_append_ulong(id, gt_string_distri_get(gff3_visitor->id_counter, type));

  /* store (unique) id */
  gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id);

  return id;
}
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav,
                                      GtFeatureNode *mRNA,
                                      GtFeatureNode *snp,
                                      GtUword variant_pos,
                                      GtUword variant_idx,
                                      char variant_char,
#ifndef NDEBUG
                                      GT_UNUSED char reference_char,
#endif
                                      GT_UNUSED GtError *err)
{
  int had_err = 0;
  char *mrnaseq;
  const char *variant_effect = NULL;
  gt_assert(mRNA && snp && sav);
  gt_log_log("processing variant char %c for SNP %s\n",
               variant_char, gt_feature_node_get_attribute(snp, "Dbxref"));
  mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA);
  gt_assert(mrnaseq);
  if (mrnaseq) {
    char codon[3],
         variant_codon[3];
    GtStr *effect_string;
    char oldamino,
         newamino;
    GT_UNUSED GtUword mrnalen;
    GtUword startpos = variant_pos / GT_CODON_LENGTH,
                  variantoffset = variant_pos % GT_CODON_LENGTH;
    mrnalen = strlen(mrnaseq);
    gt_assert(variant_pos < mrnalen);
    variant_codon[0] = codon[0] = mrnaseq[3*startpos];
    variant_codon[1] = codon[1] = mrnaseq[3*startpos+1];
    variant_codon[2] = codon[2] = mrnaseq[3*startpos+2];
    variant_codon[variantoffset] = variant_char;
#ifndef NDEBUG
    gt_assert(toupper(codon[variantoffset]) == toupper(reference_char));
#endif
    if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT);
      } else {
        variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT);
      }
    } else {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT);
      } else {
        had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1],
                                                 codon[2], &oldamino, err);
        if (!had_err) {
          had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0],
                                                   variant_codon[1],
                                                   variant_codon[2],
                                                   &newamino, err);
        }
        if (!had_err) {
          if (newamino == oldamino) {
            variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT);
          } else {
            variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT);
          }
        }
      }
    }
    if (!had_err) {
      const char *var_attrib;
      gt_assert(variant_effect != NULL);
      if ((var_attrib = gt_feature_node_get_attribute(snp,
                                                      GT_GVF_VARIANT_EFFECT))) {
        effect_string = gt_str_new_cstr(var_attrib);
        gt_str_append_cstr(effect_string, ",");
        gt_str_append_cstr(effect_string, variant_effect);
      } else {
        effect_string = gt_str_new_cstr(variant_effect);
      }
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_ulong(effect_string, variant_idx);
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA));
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string,
                         gt_feature_node_get_attribute(mRNA, GT_GFF_ID));
      gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT,
                                    gt_str_get(effect_string));
      gt_str_reset(effect_string);
      gt_str_delete(effect_string);
    }
  }

  return had_err;
}
Пример #13
0
static int store_in_subset_file(void *data, GthSA *sa,
                                const char *outputfilename, GtError *err)
{
  Store_in_subset_file_data *store_in_subset_file_data =
    (Store_in_subset_file_data*) data;
  double split_determing_percentage = 0.0;
  unsigned long filenum;
  char filenamesuffix[4];
  int had_err = 0;

  gt_error_check(err);

  /* filter before we do any further processing */
  if (gth_sa_filter_filter_sa(store_in_subset_file_data->sa_filter, sa)) {
    /* and free it afterwards */
    gth_sa_delete(sa);
    /* discard */
    return 0;
  }

  /* check whether we got a new output file to process */
  if (!store_in_subset_file_data->current_outputfilename) {
    store_in_subset_file_data->current_outputfilename =
      gt_cstr_dup(outputfilename);
  }
  else if (strcmp(store_in_subset_file_data->current_outputfilename,
                  outputfilename)) {
    /* close current output files */
    close_output_files(store_in_subset_file_data);
    gt_free(store_in_subset_file_data->current_outputfilename);
 }

  /* determine in which file the current sa needs to be put */
  switch (store_in_subset_file_data->gthsplitinfo->splitmode) {
    case ALIGNMENTSCORE_SPLIT:
      split_determing_percentage = gth_sa_score(sa);
      strcpy(filenamesuffix, "scr");
      break;
    case COVERAGE_SPLIT:
      split_determing_percentage = gth_sa_coverage(sa);
      strcpy(filenamesuffix, "cov");
      break;
    default: gt_assert(0);
  }
  gt_assert(split_determing_percentage >= 0.0);
  /* XXX: change into an assertion when coverage problem is fixed */
  if (split_determing_percentage > 1.0)
    split_determing_percentage = 1.0;

  if (split_determing_percentage == 1.0)
    filenum = store_in_subset_file_data->num_of_subset_files - 1;
  else {
    filenum =  floor(split_determing_percentage * 100.0 /
                           store_in_subset_file_data->gthsplitinfo->range);
  }
  gt_assert(filenum < store_in_subset_file_data->num_of_subset_files);

  /* make sure the file exists and is open */
  if (!store_in_subset_file_data->subset_files[filenum]) {
    gt_assert(store_in_subset_file_data->subset_filenames[filenum] == NULL);
    store_in_subset_file_data->subset_filenames[filenum] = gt_str_new();
    gt_str_append_cstr_nt(store_in_subset_file_data->subset_filenames[filenum],
                          outputfilename,
                          gt_file_basename_length(outputfilename));
    gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum],
                       '.');
    gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum],
                       filenamesuffix);
    gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum],
                        filenum *
                        store_in_subset_file_data->gthsplitinfo->range);
    gt_str_append_char(store_in_subset_file_data->subset_filenames[filenum],
                       '-');
    gt_str_append_ulong(store_in_subset_file_data->subset_filenames[filenum],
                     (filenum + 1) *
                     store_in_subset_file_data->gthsplitinfo->range);
    gt_str_append_cstr(store_in_subset_file_data->subset_filenames[filenum],
                       gt_file_mode_suffix(store_in_subset_file_data
                                           ->gthsplitinfo->file_mode));

    /* if not disabled by -force, check if file already exists */
    if (!store_in_subset_file_data->gthsplitinfo->force) {
      store_in_subset_file_data->subset_files[filenum] =
        gt_file_open(store_in_subset_file_data->gthsplitinfo->file_mode,
                     gt_str_get(store_in_subset_file_data
                                ->subset_filenames[filenum]), "r", NULL);
      if (store_in_subset_file_data->subset_files[filenum]) {
        gt_error_set(err, "file \"%s\" exists already. use option -%s to "
                     "overwrite", gt_str_get(store_in_subset_file_data
                                             ->subset_filenames[filenum]),
                     GT_FORCE_OPT_CSTR);
        had_err = -1;
      }
    }
    if (!had_err) {
      /* open split file for writing */
      store_in_subset_file_data->subset_files[filenum] =
          gt_file_xopen_file_mode(store_in_subset_file_data->gthsplitinfo
                                  ->file_mode,
                                  gt_str_get(store_in_subset_file_data
                                             ->subset_filenames[filenum]), "w");
      /* store XML header in file */
      gth_xml_show_leader(true,
                          store_in_subset_file_data->subset_files[filenum]);
    }
  }

  /* put it there */
  if (!had_err) {
    gth_xml_inter_sa_visitor_set_outfp(store_in_subset_file_data->sa_visitor,
                                       store_in_subset_file_data
                                       ->subset_files[filenum]);
    gth_sa_visitor_visit_sa(store_in_subset_file_data->sa_visitor, sa);
  }

  /* adjust counter */
  if (!had_err)
    store_in_subset_file_data->subset_file_sa_counter[filenum]++;

  /* and free it afterwards */
  gth_sa_delete(sa);

  return had_err;
}
Пример #14
0
static int split_fasta_file(const char *filename, unsigned long max_filesize,
                            bool force, GtError *err)
{
  GtFile *srcfp = NULL, *destfp = NULL;
  GtStr *destfilename = NULL;
  unsigned long filenum = 0, bytecount = 0, separator_pos;
  int read_bytes, had_err = 0;
  char buf[BUFSIZ];

  gt_error_check(err);
  gt_assert(filename && max_filesize);

  /* open source file */
  srcfp = gt_file_xopen(filename, "r");
  gt_assert(srcfp);

  /* read start characters */
  if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) {
    gt_error_set(err, "file \"%s\" is empty", filename);
    had_err = -1;
  }
  bytecount += read_bytes;

  /* make sure the file is in fasta format */
  if (!had_err && buf[0] != '>') {
    gt_error_set(err, "file is not in FASTA format");
    had_err = -1;
  }

  if (!had_err) {
    /* open destination file */
    destfilename = gt_str_new();
    gt_str_append_cstr_nt(destfilename, filename,
                          gt_file_basename_length(filename));
    gt_str_append_char(destfilename, '.');
    gt_str_append_ulong(destfilename, ++filenum);
    gt_str_append_cstr(destfilename,
                       gt_file_mode_suffix(gt_file_mode(srcfp)));
    if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                  force, err))) {
      had_err = -1;
    }
    if (!had_err)
      gt_file_xwrite(destfp, buf, read_bytes);

    while (!had_err &&
           (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) {
      if (bytecount + read_bytes > max_filesize) {
        int offset = bytecount < max_filesize ? max_filesize - bytecount : 0;
        if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) {
          separator_pos--;
          gt_assert(separator_pos < read_bytes);
          if (separator_pos)
            gt_file_xwrite(destfp, buf, separator_pos);
          /* close current file */
          gt_file_delete(destfp);
          /* open new file */
          gt_str_reset(destfilename);
          gt_str_append_cstr_nt(destfilename, filename,
                                gt_file_basename_length(filename));
          gt_str_append_char(destfilename, '.');
          gt_str_append_ulong(destfilename, ++filenum);
          gt_str_append_cstr(destfilename,
                             gt_file_mode_suffix(gt_file_mode(srcfp)));
          if (!(destfp =
                  gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                 force, err))) {
            had_err = -1;
            break;
          }
          bytecount = read_bytes - separator_pos; /* reset */
          gt_assert(buf[separator_pos] == '>');
          gt_file_xwrite(destfp, buf + separator_pos,
                         read_bytes - separator_pos);
          continue;
        }
      }
      bytecount += read_bytes;
      gt_file_xwrite(destfp, buf, read_bytes);
    }
  }

  /* free */
  gt_str_delete(destfilename);

  /* close current file */
  gt_file_delete(destfp);

  /* close source file */
  gt_file_delete(srcfp);

  return had_err;
}
Пример #15
0
static void make_unique_id_string(GtStr *current_id, GtUword counter)
{
  /* name => name.1 */
  gt_str_append_char(current_id, '.');
  gt_str_append_ulong(current_id, counter);
}
Пример #16
0
static void make_sequence_region(GtHashmap *sequence_regions,
                                 GtStr *sequenceid,
                                 GthRegionFactory *srf,
                                 GthInput *input,
                                 unsigned long filenum,
                                 unsigned long seqnum)
{
  unsigned long offset_is_defined = false;
  GtRange range, descrange;
  GtGenomeNode *sr = NULL;
  gt_assert(sequence_regions && sequenceid && srf && input);
  if (gth_input_use_substring_spec(input)) {
    range.start = gth_input_genomic_substring_from(input);
    range.end   = gth_input_genomic_substring_to(input);
  }
  else {
    range = gth_input_get_relative_genomic_range(input, filenum, seqnum);
  }
  if (srf->use_desc_ranges) {
    GtStr *description = gt_str_new();
    gth_input_get_genomic_description(input, description, filenum, seqnum);
    if (!gt_parse_description_range(gt_str_get(description), &descrange))
      offset_is_defined = true;
    gt_str_delete(description);
  }
  if (offset_is_defined)
    range = gt_range_offset(&range, descrange.start);
  else
    range = gt_range_offset(&range, 1); /* 1-based */
  if (!gt_str_length(sequenceid) ||
      (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) &&
       !offset_is_defined)) {
    /* sequenceid is empty or exists already (and no offset has been parsed)
       -> make one up */
    GtStr *seqid;
    char *base;
    base = gt_basename(gth_input_get_genomic_filename(input, filenum));
    seqid = gt_str_new_cstr(base);
    gt_free(base);
    gt_str_append_char(seqid, '|');
    gt_str_append_ulong(seqid, seqnum + 1); /* 1-based */
    seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_ULONG);
    gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)));
    gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid));
    sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum),
                            range.start, range.end);
    gt_hashmap_add(sequence_regions,
                   (void*) gt_cstr_table_get(srf->used_seqids,
                                             gt_str_get(seqid)),
                   sr);
    gt_str_delete(seqid);
  }
  else {
    /* sequenceid does not exists already (or an offset has been parsed)
       -> use this one */
    if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) {
      /* no sequence region with this id exists -> create one */
      gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid));
      seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                      offset_is_defined ? descrange.start : GT_UNDEF_ULONG);
      sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum,
                                              seqnum), range.start, range.end);
      gt_hashmap_add(sequence_regions,
                     (void*) gt_cstr_table_get(srf->used_seqids,
                                               gt_str_get(sequenceid)),
                     sr);
    }
    else {
      GtRange prev_range, new_range;
      /* sequence region with this id exists already -> modify range */
      sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid));
      gt_assert(sr);
      prev_range = gt_genome_node_get_range(sr);
      new_range = gt_range_join(&prev_range, &range);
      gt_genome_node_set_range(sr, &new_range);
      seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                      offset_is_defined ? descrange.start : GT_UNDEF_ULONG);
    }
  }
  gt_assert(sr);
}