Beispiel #1
0
static void xml_outputAGSline(const GthAGS *ags, unsigned long agsnum,
                              unsigned int indentlevel, GtFile *outfp)
{
  GthExonAGS *exon;
  unsigned long i;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<AGS_line AGS_serial=\"%lu\">\n",
                  agsnum + OUTPUTOFFSET);
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<exon_coordinates>\n");
  indentlevel++;

  for (i = 0; i < gth_ags_num_of_exons(ags); i++) {
    exon = gth_ags_get_exon(ags, i);
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n",
                    SHOWGENPOSAGS(exon->range.start),
                    SHOWGENPOSAGS(exon->range.end));
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</exon_coordinates>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</AGS_line>\n");
}
Beispiel #2
0
static void xml_show_ags(const GthAGS *ags, unsigned long pglnum,
                         unsigned long agsnum, unsigned long translationtable,
                         GthInput *input, unsigned int indentlevel,
                         GthOutput *out)
{
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "<AGS_information>\n");
  indentlevel++;

  /* output AGS line */
  xml_outputAGSline(ags, agsnum, indentlevel, out->outfp);

  /* output SCR line */
  xml_outputSCRline(ags, indentlevel, out->outfp);

  /* output exon/intron lines */
  xml_output_exon_intron_lines(ags, indentlevel, out->outfp);

  /* output PGS lines */
  xml_outputPGSlines(ags->alignments, indentlevel, out->outfp);

  /* output 3-phase translation */
  gt_outputtranslationandorf(pglnum, ags, agsnum, translationtable, input,
                          indentlevel, out);

  indentlevel--;
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "</AGS_information>\n");
}
Beispiel #3
0
static void xml_show_pgl(GthPGL *pgl, unsigned long pglnum,
                         unsigned long translationtable, GthInput *input,
                         unsigned int indentlevel, GthOutput *out)
{
  unsigned long i;

  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "<predicted_gene_location>\n");
  indentlevel++;
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp,
                     "<PGL_line PGL_serial=\"%lu\" PGL_strand=\"%c\" "
                     "PGL_start=\"%lu\" PGL_stop=\"%lu\"/>\n",
                     pglnum + OUTPUTOFFSET,
                     SHOWSTRAND(gth_pgl_is_forward(pgl)),
                     SHOWGENPOS(gth_pgl_is_forward(pgl),
                                gth_pgl_total_length(pgl),
                                gth_pgl_genomic_offset(pgl),
                                pgl->maxrange.start),
                     SHOWGENPOS(gth_pgl_is_forward(pgl),
                                gth_pgl_total_length(pgl),
                                gth_pgl_genomic_offset(pgl),
                                pgl->maxrange.end));

  for (i = 0; i < gth_pgl_num_of_ags(pgl); i++) {
    xml_show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input,
                 indentlevel, out);
  }

  indentlevel--;
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "</predicted_gene_location>\n");
}
Beispiel #4
0
void gth_bssm_param_show_info(const GthBSSMParam *bssm_param, GtFile *outfp)
{
#define SEVENCLASSSTRING        "seven-class"
#define TWOCLASSSTRING          "two-class"

#define PRINT_CLASS_STRING(MODEL) \
  if (bssm_param->MODEL##_model_set) \
  { \
    gt_file_xprintf(outfp, " (%s)", \
                    bssm_param->MODEL##_model.hypothesis_num == HYPOTHESIS7 \
                    ? SEVENCLASSSTRING : TWOCLASSSTRING); \
  } \
  gt_file_xfputc('\n', outfp);

    gt_file_xprintf(outfp,
                    "%c the specified BSSM parameter file contains the following "
                    "models:\n", COMMENTCHAR);
    gt_file_xprintf(outfp, "%c GT donor sites   = %s", COMMENTCHAR,
                    GTH_SHOWBOOL(bssm_param->gt_donor_model_set));
    PRINT_CLASS_STRING(gt_donor);

    gt_file_xprintf(outfp, "%c GC donor sites   = %s", COMMENTCHAR,
                    GTH_SHOWBOOL(bssm_param->gc_donor_model_set));
    PRINT_CLASS_STRING(gc_donor);

    gt_file_xprintf(outfp, "%c AG acceptor sites= %s", COMMENTCHAR,
                    GTH_SHOWBOOL(bssm_param->ag_acceptor_model_set));
    PRINT_CLASS_STRING(ag_acceptor);
}
void gth_run_header_show(GthCallInfo *call_info, GthInput *input,
                         const char *gth_version, unsigned int indentlevel,
                         const char **args)
{
  char *timestring;
  GtFile *outfp = call_info->out->outfp;

  /* determine time */
  timestring = gth_get_time();

  /* output XML header */
  if (call_info->out->xmlout) {
    show_xml_run_header(call_info, input, timestring, gth_version, indentlevel,
                        args);
  }
  else if (!call_info->out->gff3out) {
    gt_file_xprintf(outfp, "%c GenomeThreader %s (%s)\n", COMMENTCHAR,
                    gth_version, GT_BUILT);
    gt_file_xprintf(outfp, "%c Date run: %s\n", COMMENTCHAR, timestring);
    gt_file_xprintf(outfp, "%c Arguments: ", COMMENTCHAR);
    gt_cstr_array_show_genfile(args, outfp);
  }

  /* free */
  gt_free(timestring);
}
static void showgthreferenceinformation(GthSA *sa, GthInput *input,
                                        bool showseqnums,
                                        GtFile *outfp)
{
  gt_assert(gth_sa_ref_file_num(sa) != GT_UNDEF_UWORD);

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp,
                         "EST Sequence: file=%s, strand=%c, description=",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_strand_char(sa));
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "Protein Sequence: file=%s, description=",
                         gth_input_get_reference_filename(input,
                                                 gth_sa_ref_file_num(sa)));
      break;
    default: gt_assert(0);
  }

  gth_sa_echo_reference_description(sa, input, outfp);

  if (showseqnums)
    gt_file_xprintf(outfp, ", seqnum="GT_WU"",  gth_sa_ref_seq_num(sa));

  gt_file_xfputc('\n', outfp);
  gt_file_xfputc('\n', outfp);
}
Beispiel #7
0
void gth_xml_show_trailer(bool intermediate, GtFile *outfp)
{
  if (intermediate)
    gt_file_xprintf(outfp, "</SplicedAlignment>\n");
  else
    gt_file_xprintf(outfp, "</GTH_output>\n");
}
Beispiel #8
0
static void xml_outputSCRline(const GthAGS *ags, unsigned int indentlevel,
                              GtFile *outfp)
{
  GthSpliceSiteProb *splicesiteprob;
  unsigned long i;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<SCR_line>\n");
  indentlevel++;

  for (i = 0; i < gt_array_size(ags->exons) - 1; i++) {
    splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i);
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon-intron don_prob=\"%.3f\" "
                       "acc_prob=\"%.3f\" e_score=\"%.3f\"/>\n",
                       splicesiteprob->donorsiteprob,
                       splicesiteprob->acceptorsiteprob,
                       ((GthExonAGS*) gt_array_get(ags->exons, i))->score);
  }

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<exon-only e_score=\"%.3f\"/>\n",
                  ((GthExonAGS*) gt_array_get(ags->exons, i))->score);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</SCR_line>\n");
}
Beispiel #9
0
static void xml_showgthreferenceinformation(GthSA *sa,
                                            GthInput *input,
                                            unsigned int indentlevel,
                                            GtFile *outfp)
{
  gt_assert(gth_sa_ref_file_num(sa) != GT_UNDEF_ULONG);

  gth_indent(outfp, indentlevel);

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" "
                                "ref_strand=\"%c\" ref_description=\"",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_id(sa),
                         gth_sa_ref_strand_char(sa));
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" "
                                "ref_description=\"",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_id(sa));
      break;
    default: gt_assert(0);
  }

  gth_input_echo_reference_description(input, gth_sa_ref_file_num(sa),
                                       gth_sa_ref_seq_num(sa), outfp);

  gt_file_xprintf(outfp, "\">\n");
}
static void show_pgl(GthPGL *pgl, GtUword pglnum,
                     GtUword translationtable, GthInput *input,
                     unsigned int indentlevel, GthOutput *out)
{
  GtUword i;
  GtFile *outfp = out->outfp;

  gt_assert(!out->gff3out);

  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<predicted_gene_location>\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<PGL_line PGL_serial=\"" GT_WU "\" "
                    "PGL_strand=\"%c\" PGL_start=\"" GT_WU "\" PGL_stop=\""
                    GT_WU "\"/>\n",
                    pglnum + OUTPUTOFFSET,
                    SHOWSTRAND(gth_pgl_is_forward(pgl)),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.start),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.end));
  }
  else {
    gt_file_xprintf(outfp, "PGL %3" GT_WUS " (%c strand):      " GT_WU "     "
                    GT_WU,
                    pglnum + OUTPUTOFFSET,
                    SHOWSTRAND(gth_pgl_is_forward(pgl)),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.start),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.end));
    if (out->pglgentemplate)
      gt_file_xprintf(outfp, " (genomic template '%s')", gth_pgl_gen_id(pgl));
    gt_file_xfputc('\n', outfp);
  }

  for (i = 0; i < gt_array_size(pgl->assemblies); i++) {
    show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input,
             indentlevel, out);
  }

  if (out->xmlout) {
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</predicted_gene_location>\n");
  }
}
static void txt_pgl_visitor_preface(GthPGLVisitor *pgl_visitor,
                                    GtUword num_of_pgls)
{
  GtUword i;
  GthTxtPGLVisitor *visitor = txt_pgl_visitor_cast(pgl_visitor);
  for (i = 0; i < DELIMITERLINELENGTH; i++)
    gt_file_xfputc(PGLS_DELIMITERCHAR, visitor->out->outfp);
  gt_file_xprintf(visitor->out->outfp, "\n\n");
  gt_file_xprintf(visitor->out->outfp, "Predicted gene locations (" GT_WU
                  "):\n\n\n", num_of_pgls);
}
Beispiel #12
0
static void  xml_final_sa_visitor_trailer(GthSAVisitor *sa_visitor,
                                          unsigned long num_of_sas)
{
  GthXMLFinalSAVisitor *visitor = xml_final_sa_visitor_cast(sa_visitor);
  visitor->indentlevel++;
  gth_indent(visitor->outfp, visitor->indentlevel);
  gt_file_xprintf(visitor->outfp, "<total_number_ESTs_reported>%lu"
                  "</total_number_ESTs_reported>\n", num_of_sas);
  gth_indent(visitor->outfp, visitor->indentlevel);
  gt_file_xprintf(visitor->outfp, "</alignment_module>\n");
  visitor->indentlevel--;
}
static int gff3_show_feature_node(GtFeatureNode *fn, void *data,
                                  GT_UNUSED GtError *err)
{
  bool part_shown = false;
  GtGFF3Visitor *gff3_visitor = (GtGFF3Visitor*) data;
  GtArray *parent_features = NULL;
  ShowAttributeInfo info;
  GtUword i;
  GtStr *id;

  gt_error_check(err);
  gt_assert(fn && gff3_visitor);

  /* output leading part */
  gt_gff3_output_leading(fn, gff3_visitor->outfp);

  /* show unique id part of attributes */
  if ((id = gt_hashmap_get(gff3_visitor->feature_node_to_unique_id_str, fn))) {
    gt_file_xprintf(gff3_visitor->outfp, "%s=%s", GT_GFF_ID, gt_str_get(id));
    part_shown = true;
  }

  /* show parent part of attributes */
  parent_features = gt_hashmap_get(gff3_visitor->feature_node_to_id_array, fn);
  if (gt_array_size(parent_features)) {
    if (part_shown)
      gt_file_xfputc(';', gff3_visitor->outfp);
    gt_file_xprintf(gff3_visitor->outfp, "%s=", GT_GFF_PARENT);
    for (i = 0; i < gt_array_size(parent_features); i++) {
      if (i)
        gt_file_xfputc(',', gff3_visitor->outfp);
      gt_file_xprintf(gff3_visitor->outfp, "%s",
                      *(char**) gt_array_get(parent_features, i));
    }
    part_shown = true;
  }

  /* show missing part of attributes */
  info.attribute_shown = &part_shown;
  info.outfp = gff3_visitor->outfp;
  gt_feature_node_foreach_attribute(fn, show_attribute, &info);

  /* show dot if no attributes have been shown */
  if (!part_shown)
    gt_file_xfputc('.', gff3_visitor->outfp);

  /* show terminal newline */
  gt_file_xfputc('\n', gff3_visitor->outfp);

  return 0;
}
Beispiel #14
0
static void enrich_chain(GthChain *chain, GtFragment *fragments,
                         unsigned long num_of_fragments, bool comments,
                         GtFile *outfp)
{
  GtRange genomicrange, fragmentrange;
  GtArray *enrichment;
  unsigned long i;
  gt_assert(chain && fragments && num_of_fragments);
  if (comments) {
    gt_file_xprintf(outfp, "%c enrich global chain with the following "
                              "forward ranges:\n",COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }
  /* get genomic range of DP range */
  genomicrange = chain_get_genomicrange(chain);
  enrichment = gt_array_new(sizeof (GtRange));
  /* add each fragment which overlaps which DP range to the enrichment */
  for (i = 0; i < num_of_fragments; i++) {
    fragmentrange.start  = fragments[i].startpos2;
    fragmentrange.end = fragments[i].endpos2;
    if (gt_range_overlap(&genomicrange, &fragmentrange))
      gt_array_add(enrichment, fragmentrange);
  }
  gt_assert(gt_array_size(enrichment));
  /* sort the enrichment */
  qsort(gt_array_get_space(enrichment), gt_array_size(enrichment),
        sizeof (GtRange), (GtCompare) gt_range_compare);
  /* reset the current DP range array */
  gt_array_reset(chain->forwardranges);
  /* rebuild the DP range array which now includes the enrichment */
  genomicrange = *(GtRange*) gt_array_get_first(enrichment);
  gt_array_add(chain->forwardranges, genomicrange);
  for (i = 1; i < gt_array_size(enrichment); i++) {
    genomicrange = *(GtRange*) gt_array_get(enrichment, i);
    if (genomicrange.start <=
        ((GtRange*) gt_array_get_last(chain->forwardranges))->end) {
      /* overlap found -> modify last range, if necessary */
      if (((GtRange*) gt_array_get_last(chain->forwardranges))->end <
          genomicrange.end) {
        ((GtRange*) gt_array_get_last(chain->forwardranges))->end =
          genomicrange.end;
      }
    }
    else {
      /* save range */
      gt_array_add(chain->forwardranges, genomicrange);
    }
  }
  gt_array_delete(enrichment);
}
Beispiel #15
0
/* The following function prints the "classic" GeneSeqer2 MATCH line */
static void xml_showmatchline(GthSA *sa, unsigned int indentlevel,
                              GtFile *outfp)
{
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<MATCH_line gen_id=\"%s\" gen_strand=\"%c\" ",
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa));
  if (gth_sa_alphatype(sa) == DNA_ALPHA) {
    gt_file_xprintf(outfp, "ref_id=\"%s\" ref_strand=\"%c\">\n",
                       gth_sa_ref_id(sa),
                       gth_sa_ref_strand_char(sa));
  }
  else
    gt_file_xprintf(outfp, "ref_id=\"%s\">\n", gth_sa_ref_id(sa));

  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                     "<total_alignment_score>%.3f</total_alignment_score>\n",
                     gth_sa_score(sa));
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulative_length_of_scored_exons>%lu"
                     "</cumulative_length_of_scored_exons>\n",
                     gth_sa_cumlen_scored_exons(sa));
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage percentage=\"%.3f\" high_type=\"",
                     gth_sa_coverage(sa));
  gt_file_xfputc(gth_sa_coverage_char(sa), outfp);

  gt_file_xprintf(outfp, "\"/>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</MATCH_line>\n");
}
static int gtf_show_transcript(GtFeatureNode *feature_node,
                               GtGTFVisitor *gtf_visitor, GtError *err)
{
  GtFeatureNode *fn;
  GtUword i;
  int had_err;
  gt_error_check(err);
  gt_assert(feature_node && gtf_visitor);
  gt_array_reset(gtf_visitor->exon_features);
  gt_array_reset(gtf_visitor->CDS_features);
  had_err = gt_feature_node_traverse_direct_children(feature_node, gtf_visitor,
                                                     save_exon_node, err);
  if (gt_array_size(gtf_visitor->exon_features)) {
    /* sort exon features */
    qsort(gt_array_get_space(gtf_visitor->exon_features),
          gt_array_size(gtf_visitor->exon_features), sizeof (GtGenomeNode*),
          (GtCompare) gt_genome_node_compare);
    /* show exon features */
    gtf_visitor->transcript_id++;
    for (i = 0; i < gt_array_size(gtf_visitor->exon_features); i++) {
      fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->exon_features, i);
      gt_gff3_output_leading(fn, gtf_visitor->outfp);
      gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id "
                      "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id,
                      gtf_visitor->gene_id, gtf_visitor->transcript_id);
    }
  }
  if (gt_array_size(gtf_visitor->CDS_features)) {
    /* sort CDS features */
    qsort(gt_array_get_space(gtf_visitor->CDS_features),
          gt_array_size(gtf_visitor->CDS_features), sizeof (GtGenomeNode*),
          (GtCompare) gt_genome_node_compare);
    /* show start_codon feature */
    /* fn = *(GtFeatureNode**) */ (void) gt_array_get(gtf_visitor->CDS_features,
                                                      0);
    /* XXX: to be done */

    /* show CDS features */
    for (i = 0; i < gt_array_size(gtf_visitor->CDS_features); i++) {
      fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->CDS_features, i);
      gt_gff3_output_leading(fn, gtf_visitor->outfp);
      gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id "
                      "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id,
                      gtf_visitor->gene_id, gtf_visitor->transcript_id);
    }
    /* XXX: show stop_codon feature and shorten last CDS feature */
  }
  return had_err;
}
static void show_no_match_line(GthAlphatype overallalphatype, GtFile *outfp)
{
  gt_file_xprintf(outfp, "\nNo significant ");
  switch (overallalphatype)
  {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "EST");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "protein");
      break;
    default: gt_assert(0);
  }
  gt_file_xprintf(outfp, " matches were found.\n");
}
Beispiel #18
0
void gth_xml_show_leader(bool intermediate, GtFile *outfp)
{
  gt_file_xprintf(outfp,
                     "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n");
  if (intermediate) {
    gt_file_xprintf(outfp, "<SplicedAlignment xmlns="
                    "\"http://www.GenomeThreader.org/SplicedAlignment/\" "
                    "GTH_spliced_alignment_XML_version=\"%s\">\n",
                    GTH_SPLICED_ALIGNMENT_XML_VERSION);
  }
  else {
    gt_file_xprintf(outfp, "<GTH_output xmlns="
                     "\"http://www.genomethreader.org/GTH_output/\" "
                     "GTH_XML_version=\"%s\">\n", GTH_XML_VERSION);
  }
}
Beispiel #19
0
void gt_bioseq_show_stat(GtBioseq *bs, GtFile *outfp)
{
  GtUword i, num_of_seqs;
  gt_assert(bs);
  num_of_seqs = gt_bioseq_number_of_sequences(bs);
  gt_file_xprintf(outfp, "showing statistics for sequence file \"%s\"\n",
                  gt_str_get(bs->sequence_file));
  gt_file_xprintf(outfp, "number of sequences: "GT_WU"\n", num_of_seqs);
  gt_file_xprintf(outfp, "total length: "GT_WU"\n",
                    gt_encseq_total_length(bs->encseq)
                      - gt_encseq_num_of_sequences(bs->encseq) + 1);
  for (i = 0; i < num_of_seqs; i++) {
    gt_file_xprintf(outfp, "sequence #"GT_WU" length: "GT_WU"\n", i+1,
                    gt_bioseq_get_sequence_length(bs, i));
  }
}
static void outputAGSline(const GthAGS *ags, GtUword agsnum,
                          GtFile *outfp)
{
  GthExonAGS *exon;
  GtUword i;

  gt_file_xprintf(outfp, "AGS-" GT_WU " (",  agsnum + OUTPUTOFFSET);
  for (i = 0; i < gth_ags_num_of_exons(ags); i++) {
    exon = gth_ags_get_exon(ags, i);
    if (i > 0)
      gt_file_xfputc(',', outfp);
    gt_file_xprintf(outfp, GT_WU "  " GT_WU, SHOWGENPOSAGS(exon->range.start),
                    SHOWGENPOSAGS(exon->range.end));
  }
  gt_file_xprintf(outfp, ")\n");
}
Beispiel #21
0
static int gff3_visitor_meta_node(GtNodeVisitor *nv, GtMetaNode *mn,
                                  GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && mn);
  if (!gff3_visitor->version_string_shown) {
    if (strncmp(gt_meta_node_get_directive(mn), GT_GFF_VERSION_DIRECTIVE,
                strlen(GT_GFF_VERSION_DIRECTIVE)) == 0
          || strncmp(gt_meta_node_get_directive(mn), GT_GVF_VERSION_DIRECTIVE,
                     strlen(GT_GVF_VERSION_DIRECTIVE)) == 0) {
      gff3_visitor->version_string_shown = true;
    } else {
      gff3_version_string(nv);
    }
  }
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "##%s %s\n",
                    gt_meta_node_get_directive(mn),
                    gt_meta_node_get_data(mn));

  } else {
    gt_str_append_cstr(gff3_visitor->outstr, "##");
    gt_str_append_cstr(gff3_visitor->outstr, gt_meta_node_get_directive(mn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_cstr(gff3_visitor->outstr, gt_meta_node_get_data(mn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
Beispiel #22
0
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && rn);
  gff3_version_string(nv);
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "%s   %s "GT_WU" "GT_WU"\n",
                    GT_GFF_SEQUENCE_REGION,
                    gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)),
                    gt_genome_node_get_start((GtGenomeNode*) rn),
                    gt_genome_node_get_end((GtGenomeNode*) rn));
  } else {
    gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION);
    gt_str_append_cstr(gff3_visitor->outstr, "   ");
    gt_str_append_cstr(gff3_visitor->outstr,
                      gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_start((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_end((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
static void show_ags(const GthAGS *ags, GtUword pglnum,
                     GtUword agsnum, GtUword translationtable,
                     GthInput *input, unsigned int indentlevel, GthOutput *out)
{
  GtFile *outfp = out->outfp;

  /* output AGS line */
  outputAGSline(ags, agsnum, out->outfp);

  /* output SCR line */
  outputSCRline(ags, out->outfp);

  /* output exon/intron lines */
  output_exon_intron_lines(ags, out->widthforgenpos, out->outfp);

  /* output PGS lines */
  outputPGSlines(ags->alignments, out->outfp);

  /* output 3-phase translation */
  gt_outputtranslationandorf(pglnum, ags, agsnum, translationtable, input,
                             indentlevel, out);

  /* output three final newlines */
  gt_file_xprintf(outfp, "\n\n\n");
}
Beispiel #24
0
static int gff3_visitor_sequence_node(GtNodeVisitor *nv, GtSequenceNode *sn,
                                      GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && sn);
  gff3_version_string(nv);
  if (!gff3_visitor->fasta_directive_shown) {
    if (!gff3_visitor->outstr)
      gt_file_xprintf(gff3_visitor->outfp, "%s\n", GT_GFF_FASTA_DIRECTIVE);
    else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_FASTA_DIRECTIVE);
      gt_str_append_char(gff3_visitor->outstr, '\n');
    }
    gff3_visitor->fasta_directive_shown = true;
  }
  if (!gff3_visitor->outstr) {
    gt_fasta_show_entry(gt_sequence_node_get_description(sn),
                        gt_sequence_node_get_sequence(sn),
                        gt_sequence_node_get_sequence_length(sn),
                        gff3_visitor->fasta_width, gff3_visitor->outfp);
  } else {
    gt_fasta_show_entry_str(gt_sequence_node_get_description(sn),
                            gt_sequence_node_get_sequence(sn),
                            gt_sequence_node_get_sequence_length(sn),
                            gff3_visitor->fasta_width, gff3_visitor->outstr);
  }
  return 0;
}
Beispiel #25
0
static int create_manpage(const char *outdir, const char *toolname,
                          GtOptionParser *option_parser, GtError *err)
{
  GtFile *outfile = NULL;
  GtStr *man, *pathbuf;
  char *utoolname;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(outdir && toolname && option_parser);
  man = gt_str_new();
  pathbuf = gt_str_new_cstr(outdir);
  utoolname = gt_cstr_dup(toolname);
  gt_cstr_rep(utoolname, ' ', '_');
  if (!gt_file_exists(gt_str_get(pathbuf)))
    gt_xmkdir(gt_str_get(pathbuf));
  gt_str_append_char(pathbuf, GT_PATH_SEPARATOR);
  gt_str_append_cstr(pathbuf, utoolname);
  gt_str_append_cstr(pathbuf, ".mansrc");
  gt_free(utoolname);
  if (!(outfile = gt_file_new(gt_str_get(pathbuf), "w+", err)))
    had_err = -1;
  if (!had_err)
    had_err = gt_option_parser_manpage(option_parser, toolname, man, err);
  if (!had_err)
    gt_file_xprintf(outfile, "%s", gt_str_get(man));
  gt_file_delete(outfile);
  gt_str_delete(pathbuf);
  gt_str_delete(man);
  return had_err;
}
static void show_spliced_alignment(GthSA *sa, GthInput *input, bool gs2out,
                                   GtUword minintronlength,
                                   GtUword widthforgenpos,
                                   GtUword showintronmaxlen,
                                   GtUword translationtable,
                                   bool showseqnums, GtFile *outfp)
{
  bool wildcardimplosion = false;

  showdelimiterline(outfp);

  if (gs2out) {
    /* all wildcards (N,S,Y,W,R,K,V,B,D,H,M) will be replaced by the wildcard N
       makes only sense for a DNA alphabet */
    wildcardimplosion = true;
    showgs2referenceinformation(sa, outfp);
    gth_sa_echo_reference_sequence(sa, input, true, outfp);
  }
  else {
    showgthreferenceinformation(sa, input, showseqnums, outfp);
    gth_sa_echo_reference_sequence(sa, input, true, outfp);
    showgthgenomicinformation(sa, input, showseqnums, outfp);
  }

  showalignmentheader(sa, gs2out, widthforgenpos, minintronlength, outfp);

  gt_file_xprintf(outfp,
                     "Alignment (genomic DNA sequence = upper lines):\n\n");

  gth_sa_echo_alignment(sa, showintronmaxlen, translationtable,
                        wildcardimplosion, input, outfp);
}
Beispiel #27
0
void gt_gc_content_show(const char *seq, unsigned long len,
                        GtAlphabet *alphabet, GtFile *outfp)
{
  unsigned long i,
                gc = 0, /* number of G/C bases */
                at = 0, /* number of A/T bases */
                n  = 0; /* number of N   bases */
  unsigned int a_code, c_code, g_code, t_code, n_code, cc;
  gt_assert(seq && alphabet);
  gt_assert(gt_alphabet_is_dna(alphabet));
  a_code = gt_alphabet_encode(alphabet, 'A');
  c_code = gt_alphabet_encode(alphabet, 'C');
  g_code = gt_alphabet_encode(alphabet, 'G');
  t_code = gt_alphabet_encode(alphabet, 'T');
  n_code = gt_alphabet_encode(alphabet, 'N');
  for (i = 0; i < len; i++) {
    cc = gt_alphabet_encode(alphabet, seq[i]);
    if (cc == g_code || cc == c_code)
      gc++;
    else if (cc == a_code || cc == t_code)
      at++;
    else if (cc == n_code)
      n++;
    else {
      gt_assert(0);
    }
  }
  gt_file_xprintf(outfp, "GC-content: %.2f%% (AT-content: %.2f%%, "
                         "N-content: %.2f%%)\n",
                  ((double) gc / len) * 100.0, ((double) at / len) * 100.0,
                  ((double) n  / len) * 100.0);
}
static void output_exon_intron_lines(const GthAGS *ags, int widthforgenpos,
                                     GtFile *outfp)
{
  GthSpliceSiteProb *splicesiteprob;
  GthExonAGS *exon;
  GtUword i, leftexonborder, rightexonborder, exonlength,
                leftintronborder = GT_UNDEF_UWORD, rightintronborder,
                intronlength;
  GthDbl exonscore;
  GthFlt donorsiteprob, acceptorsiteprob;

  for (i = 0; i < gt_array_size(ags->exons); i++) {
    exon            = (GthExonAGS*) gt_array_get(ags->exons, i);
    leftexonborder  = exon->range.start;
    rightexonborder = exon->range.end;
    exonlength      = rightexonborder - leftexonborder + 1;
    exonscore       = exon->score;

    if (i > 0) {
      rightintronborder = leftexonborder - 1;
      intronlength      = rightintronborder - leftintronborder + 1;
      splicesiteprob    = (GthSpliceSiteProb*)
                          gt_array_get(ags->splicesiteprobs, i-1);
      donorsiteprob     = splicesiteprob->donorsiteprob;
      acceptorsiteprob  = splicesiteprob->acceptorsiteprob;

      /* output intron */
      gt_file_xprintf(outfp,
                      "    Intron %2" GT_WUS " %*" GT_WUS " %*" GT_WUS " (%4"
                      GT_WUS " n);           " "Pd: %5.3f  Pa: %5.3f\n",
                      i - 1 + OUTPUTOFFSET, widthforgenpos,
                      SHOWGENPOSAGS(leftintronborder), widthforgenpos,
                      SHOWGENPOSAGS(rightintronborder), intronlength,
                      donorsiteprob, acceptorsiteprob);
    }
    leftintronborder = rightexonborder + 1;

    /* output exon */
    gt_file_xprintf(outfp,
                    "  Exon %2" GT_WUS " %*" GT_WUS " %*" GT_WUS " (%4" GT_WUS
                    " n); score: %5.3f\n",
                    i + OUTPUTOFFSET, widthforgenpos,
                    SHOWGENPOSAGS(leftexonborder), widthforgenpos,
                    SHOWGENPOSAGS(rightexonborder), exonlength, exonscore);
  }
  gt_file_xfputc('\n', outfp);
}
Beispiel #29
0
void gth_chain_shorten_introns(GthChain *chain, unsigned long icdelta,
                               unsigned long icminremintronlength,
                               unsigned long gen_total_length,
                               unsigned long gen_offset, bool comments,
                               GtFile *outfp)
{
  GthInvertedChain inverted_chain;

  gt_assert(chain);

  /* init */
  inverted_chain_init(&inverted_chain);

  if (comments) {
    gt_file_xprintf(outfp, "%c forward DP ranges (before post processing of "
                       "potential introns):\n", COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }

  /* chain -> inverted_chain */
  convert_chain_to_inverted_chain(&inverted_chain, chain);
  gt_assert(conversion_is_correct(chain, &inverted_chain, gen_total_length,
                                  gen_offset));

  /* post processing of potential introns */
  potentialintronspostpro(inverted_chain.forwardranges, icdelta,
                          icminremintronlength);

  /* reset chain */
  gt_array_set_size(chain->forwardranges, 0);
  gt_array_set_size(chain->reverseranges, 0);

  /* inverted_chain -> chain */
  convert_inverted_chain_to_chain(chain, &inverted_chain, gen_total_length,
                                  gen_offset);

  if (comments) {
    gt_file_xprintf(outfp,"%c forward DP ranges (after post processing of "
                       "potential introns):\n" , COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }

  /* free space */
  inverted_chain_free(&inverted_chain);
}
static void outputSCRline(const GthAGS *ags, GtFile *outfp)
{
  GthSpliceSiteProb *splicesiteprob;
  GtUword i;

  gt_file_xprintf(outfp, "SCR   (");
  for (i = 0; i < gt_array_size(ags->exons) - 1; i++) {
    splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i);
    gt_file_xprintf(outfp, "e %5.3f  d %5.3f a %5.3f,",
                    ((GthExonAGS*) gt_array_get(ags->exons, i))->score,
                    splicesiteprob->donorsiteprob,
                    splicesiteprob->acceptorsiteprob);
  }
  gt_file_xprintf(outfp, "e %5.3f)\n",
                  ((GthExonAGS*) gt_array_get(ags->exons, i))->score);
  gt_file_xfputc('\n', outfp);
}