コード例 #1
0
static int extract_feature_visitor_feature_node(GtNodeVisitor *nv,
                                                GtFeatureNode *fn, GtError *err)
{
  GtExtractFeatureVisitor *efv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *child;
  GtStrArray *target_ids = NULL;
  GtStr *seqid = NULL,
        *description,
        *sequence;
  int had_err = 0;
  gt_error_check(err);
  efv = gt_extract_feature_visitor_cast(nv);
  gt_assert(efv->region_mapping);
  fni = gt_feature_node_iterator_new(fn);
  if (efv->target)
    target_ids = gt_str_array_new();
  if (efv->seqid)
    seqid = gt_str_new();
  description = gt_str_new();
  sequence = gt_str_new();
  while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
    if (seqid)
      gt_str_reset(seqid);
    if (target_ids)
      gt_str_array_reset(target_ids);
    if (gt_extract_feature_sequence(sequence, (GtGenomeNode*) child, efv->type,
                                    efv->join, seqid, target_ids,
                                    efv->region_mapping, err)) {
      had_err = -1;
    }

    if (!had_err && gt_str_length(sequence)) {
      efv->fastaseq_counter++;
      construct_description(description, efv->type, efv->fastaseq_counter,
                            efv->join, efv->translate, seqid, target_ids);
      had_err = show_entry(description, sequence, efv->translate, efv->width,
                           efv->outfp);
      gt_str_reset(description);
      gt_str_reset(sequence);
    }

  }
  gt_str_delete(sequence);
  gt_str_delete(description);
  gt_str_delete(seqid);
  gt_str_array_delete(target_ids);
  gt_feature_node_iterator_delete(fni);
  return had_err;
}
コード例 #2
0
static int update_seq_col_if_necessary(GtRegionMapping *rm, GtStr *seqid,
                                       GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(rm && seqid);
  /* for mappings, we need to load the changed sequence, if needed... */
  if (rm->mapping) {
    if (!rm->sequence_file || (gt_str_cmp(rm->sequence_name, seqid))) {
      gt_str_delete(rm->sequence_file);
      /* ignore MD5 hashes when using region mappings */
      if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
        rm->sequence_file = region_mapping_map(rm,
                                               gt_str_get(seqid)
                                                 +GT_MD5_SEQID_TOTAL_LEN,
                                               err);
      } else
        rm->sequence_file = region_mapping_map(rm, gt_str_get(seqid), err);
      if (!rm->sequence_file)
        had_err = -1;
      else {
        /* load new seqcol */
        if (!rm->sequence_filenames)
          rm->sequence_filenames = gt_str_array_new();
        else
          gt_str_array_reset(rm->sequence_filenames);
        gt_str_array_add(rm->sequence_filenames, rm->sequence_file);
        if (!rm->sequence_name)
          rm->sequence_name = gt_str_new();
        else
          gt_str_reset(rm->sequence_name);
        gt_str_append_str(rm->sequence_name, seqid);
        gt_seq_col_delete(rm->seq_col);
        rm->seq_col = gt_bioseq_col_new(rm->sequence_filenames, err);
        if (!rm->seq_col)
          had_err = -1;
      }
    }
  } else {
    /* ...otherwise, just make sure the seqcol is loaded */
    if (!rm->seq_col) {
      if (rm->encseq) {
        if (!(rm->seq_col = gt_encseq_col_new(rm->encseq, err)))
          had_err = -1;
      } else {
        gt_assert(rm->sequence_filenames);
        if (!(rm->seq_col = gt_bioseq_col_new(rm->sequence_filenames, err)))
          had_err = -1;
      }
    }
    if (!had_err && rm->usedesc) {
      if (rm->seqid2seqnum_mapping)
        gt_seqid2seqnum_mapping_delete(rm->seqid2seqnum_mapping);
      rm->seqid2seqnum_mapping =
                           gt_seqid2seqnum_mapping_new_seqcol(rm->seq_col, err);
      if (!rm->seqid2seqnum_mapping) {
        had_err = -1;
      }
    }
  }
  return had_err;
}
コード例 #3
0
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}