コード例 #1
0
ファイル: sa.c プロジェクト: AnnSeidel/genometools
void gth_sa_show(GthSA *sa, GthInput *input, GtFile *outfp)
{
  GthSAVisitor *sa_visitor;
  gt_assert(sa && input);
  gth_input_load_genomic_file(input, sa->gen_file_num, false);
  gth_input_load_reference_file(input, sa->ref_file_num, false);
  sa_visitor = gth_txt_sa_visitor_new(input,
                                      GTH_DEFAULT_GS2OUT,
                                      GTH_DEFAULT_DPMININTRONLENGTH,
                                      6, /* XXX */
                                      GTH_DEFAULT_SHOWINTRONMAXLEN,
                                      GTH_DEFAULT_TRANSLATIONTABLE,
                                      GTH_DEFAULT_SHOWSEQNUMS,
                                      outfp);
  gth_sa_visitor_visit_sa(sa_visitor, sa);
  gth_sa_visitor_delete(sa_visitor);
}
コード例 #2
0
void gth_chaining(GthChainCollection *chain_collection,
                  GtUword gen_file_num,
                  GtUword ref_file_num,
                  GthCallInfo *call_info,
                  GthInput *input,
                  GthStat *stat,
                  bool directmatches,
                  const GthPlugins *plugins)
{
  GtUword i, numofsequences = 0;
  GtArray *matches;
  GthChainingInfo chaining_info;
  void *matcher_arguments;
  GtFile *outfp = call_info->out->outfp;
  GthMatchProcessorInfo match_processor_info;
  bool refseqisdna = gth_input_ref_file_is_dna(input, ref_file_num);

  /* make sure matcher is defined */
  gt_assert(plugins);
  gt_assert(plugins->matcher_arguments_new);
  gt_assert(plugins->matcher_arguments_delete);
  gt_assert(plugins->matcher_runner);

  /* init */
  matches = gt_array_new(sizeof (GthMatch));

  chaining_info_init(&chaining_info, directmatches, refseqisdna, call_info,
                     input, stat, gen_file_num, ref_file_num);

  matcher_arguments =
    plugins->matcher_arguments_new(true,
                          input,
                          call_info->simfilterparam.inverse || !refseqisdna
                          ? gth_input_get_genomic_filename(input, gen_file_num)
                          : gth_input_get_reference_filename(input,
                                                             ref_file_num),
                          call_info->simfilterparam.inverse || !refseqisdna
                          ? gth_input_get_reference_filename(input,
                                                             ref_file_num)
                          : gth_input_get_genomic_filename(input, gen_file_num),
                          directmatches,
                          refseqisdna,
                          call_info->progname,
                          gt_str_get(gth_input_proteinsmap(input)),
                          call_info->simfilterparam.exact,
                          call_info->simfilterparam.edist,
                          false,
                          0,
                          call_info->simfilterparam.minmatchlength,
                          call_info->simfilterparam.seedlength,
                          call_info->simfilterparam.exdrop,
                          call_info->simfilterparam.prminmatchlen,
                          call_info->simfilterparam.prseedlength,
                          call_info->simfilterparam.prhdist,
                          call_info->translationtable,
                          call_info->simfilterparam.online,
                          call_info->simfilterparam.noautoindex,
                          call_info->simfilterparam.maskpolyAtails,
                          false);

  match_processor_info_init(&match_processor_info, matches, chain_collection,
                            directmatches, refseqisdna,
                            call_info->simfilterparam.online,
                            call_info->simfilterparam.inverse, stat,
                            &chaining_info,
                            call_info->simfilterparam.maxnumofmatches,
                            call_info->simfilterparam.rare,
                            call_info->fragweightfactor,
                            plugins->jump_table_new,
                            plugins->jump_table_new_reverse,
                            plugins->jump_table_delete);

  if (call_info->simfilterparam.maxnumofmatches > 0 ||
      gth_stat_get_matchnumdistri(stat)) {
    /* alloc space of match number counter */
    numofsequences = gth_input_num_of_ref_seqs(input, ref_file_num);
    match_processor_info.matchnumcounter = gt_malloc(sizeof (GtUword) *
                                                     numofsequences);

    /* init match number counter to 0 */
    memset(match_processor_info.matchnumcounter, 0,
           (size_t) numofsequences * sizeof (GtUword));
  }

  /* free input, which contains the virtual trees.
     because vmatch loads the virtual trees into memory, too.
     this prevents that the virtual trees are loaded twice. */
  gth_input_delete_current(input);

  /* call matcher */
  if (call_info->out->showverbose)
    call_info->out->showverbose("call vmatch to compute matches");

  plugins->matcher_runner(matcher_arguments, call_info->out->showverbose,
                          call_info->out->showverboseVM, &match_processor_info);

  /* free matcher stuff here, because otherwise the reference file is mapped
     twice below */
  plugins->matcher_arguments_delete(matcher_arguments);

  /* free sequence collections (if they have been filled by the matcher) */
  gth_seq_con_delete(match_processor_info.gen_seq_con);
  gth_seq_con_delete(match_processor_info.ref_seq_con);

  /* save match numbers of match number distribution, if necessary */
  if (gth_stat_get_matchnumdistri(stat)) {
    for (i = 0; i < numofsequences; i++) {
      if (match_processor_info.matchnumcounter[i] > 0) {
        gth_stat_add_to_matchnumdistri(stat,
                                      match_processor_info.matchnumcounter[i]);
      }
    }
  }

  /* free match number counter */
  gt_free(match_processor_info.matchnumcounter);

  /* return if no match has been found */
  if (!gt_array_size(matches)) {
    if (call_info->out->comments)
      gt_file_xprintf(outfp, "%c no match has been found\n", COMMENTCHAR);
    gt_array_delete(matches);
    return;
  }

  /* load genomic file back into memory */
  gth_input_load_genomic_file(input, gen_file_num, true);

  /* load reference file back into memory */
  gth_input_load_reference_file(input, ref_file_num, true);

  /* compute chains from matches */
  calc_chains_from_matches(chain_collection, matches, &chaining_info,
                           gth_input_current_gen_seq_con(input),
                           gth_input_current_ref_seq_con(input),
                           call_info->simfilterparam.rare,
                           call_info->fragweightfactor,
                           plugins->jump_table_new,
                           plugins->jump_table_new_reverse,
                           plugins->jump_table_delete);

  if (call_info->out->showverbose) {
    call_info->out->showverbose("sort global chains according to reference "
                                "sequence coverage");
  }

  /* sort chains */
  gth_chain_collection_sort(chain_collection);

  /* free */
  gt_array_delete(matches);
}
コード例 #3
0
ファイル: sa.c プロジェクト: AnnSeidel/genometools
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}
コード例 #4
0
ファイル: sa.c プロジェクト: AnnSeidel/genometools
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen,
                           GtUword translationtable,
                           bool wildcardimplosion, GthInput *input,
                           GtFile *outfp)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  bool reverse_subject_pos = false;
  const unsigned char *gen_seq_orig, *ref_seq_orig;
  GthSeqCon *ref_seq_con;
  GtAlphabet *ref_alphabet;

  gt_assert(sa && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);
  ref_alphabet = gth_input_current_ref_alphabet(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig =
    gth_input_original_genomic_sequence(input, sa->gen_file_num,
                                        sa->gen_strand_forward)
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH,
                          gth_sa_get_editoperations(sa),
                          gth_sa_get_editoperations_length(sa),
                          gth_sa_indelcount(sa),
                          gen_seq_orig + genomicstartcutoff,
                          gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                          ref_seq_orig + referencestartcutoff,
                          gth_sa_ref_total_length(sa) -
                          referencetotalcutoff,
                          gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                          gth_sa_gen_offset(sa), referencestartcutoff,
                          gth_sa_gen_total_length(sa), showintronmaxlen,
                          ref_alphabet, reverse_subject_pos,
                          wildcardimplosion);
      break;
    case PROTEIN_ALPHA:
      gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH,
                              gth_sa_get_editoperations(sa),
                              gth_sa_get_editoperations_length(sa),
                              gth_sa_indelcount(sa),
                              gen_seq_orig + genomicstartcutoff,
                              gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                              ref_seq_orig + referencestartcutoff,
                              gth_sa_ref_total_length(sa) -
                              referencetotalcutoff,
                              gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                              gth_sa_gen_offset(sa), referencestartcutoff,
                              gth_sa_gen_total_length(sa), showintronmaxlen,
                              ref_alphabet, translationtable,
                              gth_input_score_matrix(input),
                              gth_input_score_matrix_alpha(input),
                              reverse_subject_pos, wildcardimplosion);
      break;
    default: gt_assert(0);
  }
}