Пример #1
0
GtFeatureIndex *agn_import_simple(int numfiles, const char **filenames,
                                  char *type, AgnLogger *logger)
{
  GtFeatureIndex *features = gt_feature_index_memory_new();

  GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3);

  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, type, type);
  GtNodeStream *filterstream = agn_filter_stream_new(gff3, typestokeep);

  GtNodeStream *featstream = gt_feature_out_stream_new(filterstream, features);

  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    agn_logger_log_error(logger, "error processing node stream: %s",
                         gt_error_get(error));
  }
  gt_error_delete(error);

  if(agn_logger_has_error(logger))
  {
    gt_feature_index_delete(features);
    features = NULL;
  }
  gt_node_stream_delete(gff3);
  gt_node_stream_delete(filterstream);
  gt_node_stream_delete(featstream);
  return features;
}
Пример #2
0
GtFeatureIndex *agn_import_canonical(int numfiles, const char **filenames,
                                     AgnLogger *logger)
{
  GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3);

  GtFeatureIndex *features = gt_feature_index_memory_new();
  GtNodeStream *cgstream = agn_canon_gene_stream_new(gff3, logger);
  GtNodeStream *featstream = gt_feature_out_stream_new(cgstream, features);

  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    agn_logger_log_error(logger, "error processing node stream: %s",
                         gt_error_get(error));
  }
  gt_error_delete(error);

  if(agn_logger_has_error(logger))
  {
    gt_feature_index_delete(features);
    features = NULL;
  }
  gt_node_stream_delete(gff3);
  gt_node_stream_delete(cgstream);
  gt_node_stream_delete(featstream);
  return features;
}
Пример #3
0
GtNodeVisitor*
agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams)
{
  agn_assert(astream);

  // Create the node visitor
  GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class());
  AgnGaevalVisitor *v = gaeval_visitor_cast(nv);
  v->alignments = gt_feature_index_memory_new();
  v->tsvout = NULL;
  v->params = gparams;

  // Check that sum of weights is 1.0
  double weights_total = gparams.alpha + gparams.beta +
                         gparams.gamma + gparams.epsilon;
  if(fabs(weights_total - 1.0) > 0.0001)
  {
    fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: "
            "sum of weights is not 1.0 %.3lf; integrity calculations will be "
            "incorrect\n", weights_total);
  }


  // Set up node stream to load alignment features into memory
  GtQueue *streams = gt_queue_new();
  GtNodeStream *stream, *last_stream;
  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match");
  gt_hashmap_add(typestokeep, "EST_match", "EST_match");
  gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match");
  stream = agn_filter_stream_new(astream, typestokeep);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_feature_out_stream_new(last_stream, v->alignments);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match",
                                       "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  // Process the node stream
  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(last_stream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n",
            gt_error_get(error));
    gt_node_visitor_delete(nv);
    return NULL;
  }
  gt_error_delete(error);
  gt_hashmap_delete(typestokeep);
  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);

  return nv;
}
Пример #4
0
int gt_feature_in_stream_unit_test(GtError *error)
{
  GtNodeStream *src, *dest;
  GtFeatureIndex *prefeat, *postfeat;
  GtRange range1, range1test, range2, range2test;

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  int result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error);
  if (gt_str_array_size(seqids) != 2)
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 "
                 "seqids, found "GT_WU"", gt_str_array_size(seqids));
    return -1;
  }
  gt_str_array_delete(seqids);

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src);
  result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  range1test.start = 1; range1test.end = 100000;
  range2test.start = 1; range2test.end = 10000;
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  return 0;
}