コード例 #1
0
ファイル: sw_example.c プロジェクト: goshng/seq-align
void align(char* seq_a, char* seq_b)
{
  // Variables to store alignment result
  sw_aligner_t *sw = smith_waterman_new();
  alignment_t *result = alignment_create(256);

  // Decide on scoring
  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;
  
  // Don't penalise gaps at the start
  // ACGATTT
  // ----TTT would score +3 (when match=+1)
  char no_start_gap_penalty = 1;
  
  // ..or gaps at the end e.g.
  // ACGATTT
  // ACGA--- would score +4 (when match=+1)
  char no_end_gap_penalty = 1;

  char no_gaps_in_a = 0, no_gaps_in_b = 0;
  char no_mismatches = 0;

  // Compare character case-sensitively (usually set to 0 for DNA etc)
  char case_sensitive = 0;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive);

  // Add some special cases
  // x -> y means x in seq1 changing to y in seq2
  scoring_add_mutation(&scoring, 'a', 'c', -2); // a -> c give substitution score -2
  scoring_add_mutation(&scoring, 'c', 'a', -1); // c -> a give substitution score -1

  // We could also prohibit the aligning of characters not given as special cases
  // scoring.use_match_mismatch = 0;

  smith_waterman_align(seq_a, seq_b, &scoring, sw);

  while(smith_waterman_fetch(sw, result))
  {
    printf("seqA: %s [start:%zu]\n", result->result_a, result->pos_a);
    printf("seqB: %s [start:%zu]\n", result->result_b, result->pos_b);
    printf("alignment score: %i\n\n", result->score);
  }

  // Free memory for storing alignment results
  smith_waterman_free(sw);
  alignment_free(result);
}
コード例 #2
0
ファイル: tests.c プロジェクト: xujl12/BLAST
void sw_test_no_gaps_smith_waterman()
{
  sw_aligner_t *sw = smith_waterman_new();
  alignment_t *result = alignment_create(256);

  const char* seq_a = "gacag";
  const char* seq_b = "tgaagt";

  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;

  bool no_start_gap_penalty = false, no_end_gap_penalty = false;
  bool no_gaps_in_a = true, no_gaps_in_b = true;
  bool no_mismatches = false, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  smith_waterman_align(seq_a, seq_b, &scoring, sw);

  smith_waterman_fetch(sw, result);
  ASSERT(strcmp(result->result_a, "ga") == 0 &&
         strcmp(result->result_b, "ga") == 0);

  smith_waterman_fetch(sw, result);
  ASSERT(strcmp(result->result_a, "ag") == 0 &&
         strcmp(result->result_b, "ag") == 0);

  alignment_free(result);
  smith_waterman_free(sw);
}
コード例 #3
0
ファイル: lcs_cmdline.c プロジェクト: DanielTillett/seq-align
int main(int argc, char **argv)
{
  if(argc != 2) print_usage(argv);

  char *seq = argv[1];
  size_t seqlen = strlen(seq);

  // Go
  int match = 1, mismatch = -1, gap_open = -4, gap_extend = -1;

  bool no_start_gap_penalty = false, no_end_gap_penalty = false;
  bool no_gaps_in_a = true, no_gaps_in_b = true;
  bool no_mismatches = true, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  // Alignment results stored here
  sw_aligner_t *sw = smith_waterman_new();
  alignment_t *aln = alignment_create(seqlen+1);

  smith_waterman_align(seq, seq, &scoring, sw);

  // Loop over results
  while(smith_waterman_fetch(sw, aln))
  {
    if(aln->pos_a < aln->pos_b) {
      fputs(aln->result_a, stdout);
      printf(" [%zu,%zu]\n", aln->pos_a, aln->pos_b);
    }
  }

  smith_waterman_free(sw);
  alignment_free(aln);

  return EXIT_SUCCESS;
}
コード例 #4
0
ファイル: sw_cmdline.c プロジェクト: DanielTillett/seq-align
// Align two sequences against each other to find local alignments between them
void align(const char *seq_a, const char *seq_b,
           const char *seq_a_name, const char *seq_b_name)
{
  if((seq_a_name != NULL || seq_b_name != NULL) && wait_on_keystroke)
  {
    fprintf(stderr, "Error: Interactive input takes seq only "
                    "(no FASTA/FASTQ) '%s:%s'\n", seq_a_name, seq_b_name);
    fflush(stderr);
    exit(EXIT_FAILURE);
  }

  // Check both arguments have length > 0
  if(seq_a[0] == '\0' || seq_b[0] == '\0')
  {
    fprintf(stderr, "Error: Sequences must have length > 0\n");
    fflush(stderr);

    if(cmd->print_fasta && seq_a_name != NULL && seq_b_name != NULL)
    {
      fprintf(stderr, "%s\n%s\n", seq_a_name, seq_b_name);
    }

    fflush(stderr);

    return;
  }

  smith_waterman_align(seq_a, seq_b, &scoring, sw);

  aligner_t *aligner = smith_waterman_get_aligner(sw);
  size_t len_a = aligner->score_width-1, len_b = aligner->score_height-1;

  printf("== Alignment %zu lengths (%lu, %lu):\n", alignment_index, len_a, len_b);

  if(cmd->print_matrices)
  {
    alignment_print_matrices(aligner);
  }

  // seqA
  if(cmd->print_fasta && seq_a_name != NULL)
  {
    fputs(seq_a_name, stdout);
    putc('\n', stdout);
  }

  if(cmd->print_seq)
  {
    fputs(seq_a, stdout);
    putc('\n', stdout);
  }

  // seqB
  if(cmd->print_fasta && seq_b_name != NULL)
  {
    fputs(seq_b_name, stdout);
    putc('\n', stdout);
  }

  if(cmd->print_seq)
  {
    fputs(seq_b, stdout);
    putc('\n', stdout);
  }

  putc('\n', stdout);

  if(!cmd->min_score_set)
  {
    // If min_score hasn't been set, set a limit based on the lengths of seqs
    // or zero if we're running interactively
    cmd->min_score = wait_on_keystroke ? 0
                       : scoring.match * MAX2(0.2 * MIN2(len_a, len_b), 2);

    #ifdef SEQ_ALIGN_VERBOSE
    printf("min_score: %i\n", cmd->min_score);
    #endif
  }

  fflush(stdout);

  size_t hit_index = 0;

  // For print context
  size_t context_left = 0, context_right = 0;
  size_t left_spaces_a = 0, left_spaces_b = 0;
  size_t right_spaces_a = 0, right_spaces_b = 0;


  while(get_next_hit() &&
        smith_waterman_fetch(sw, result) && result->score >= cmd->min_score &&
        (!cmd->max_hits_per_alignment_set ||
         hit_index < cmd->max_hits_per_alignment))
  {
    printf("hit %zu.%zu score: %i\n", alignment_index, hit_index++, result->score);

    if(cmd->print_context)
    {
      // Calculate number of characters of context to print either side
      context_left = MAX2(result->pos_a, result->pos_b);
      context_left = MIN2(context_left, cmd->print_context);

      size_t rem_a = len_a - (result->pos_a + result->len_a);
      size_t rem_b = len_b - (result->pos_b + result->len_b);

      context_right = MAX2(rem_a, rem_b);
      context_right = MIN2(context_right, cmd->print_context);

      left_spaces_a = (context_left > result->pos_a)
                      ? context_left - result->pos_a : 0;

      left_spaces_b = (context_left > result->pos_b)
                      ? context_left - result->pos_b : 0;

      right_spaces_a = (context_right > rem_a) ? context_right - rem_a : 0;
      right_spaces_b = (context_right > rem_b) ? context_right - rem_b : 0;
    }

    #ifdef SEQ_ALIGN_VERBOSE
    printf("context left = %lu; right = %lu spacing: [%lu,%lu] [%lu,%lu]\n",
           context_left, context_right,
           left_spaces_a, right_spaces_a,
           left_spaces_b, right_spaces_b);
    #endif

    // seq a
    print_alignment_part(result->result_a, result->result_b,
                         result->pos_a, result->len_a,
                         seq_a,
                         left_spaces_a, right_spaces_a,
                         context_left-left_spaces_a,
                         context_right-right_spaces_a);

    if(cmd->print_pretty)
    {
      fputs("  ", stdout);

      size_t max_left_spaces = MAX2(left_spaces_a, left_spaces_b);
      size_t max_right_spaces = MAX2(right_spaces_a, right_spaces_b);
      size_t spacer;

      // Print spaces for lefthand spacing
      for(spacer = 0; spacer < max_left_spaces; spacer++)
      {
        putc(' ', stdout);
      }

      // Print dots for lefthand context sequence
      for(spacer = 0; spacer < context_left-max_left_spaces; spacer++)
      {
        putc('.', stdout);
      }

      alignment_print_spacer(result->result_a, result->result_b, &scoring);

      // Print dots for righthand context sequence
      for(spacer = 0; spacer < context_right-max_right_spaces; spacer++)
      {
        putc('.', stdout);
      }

      // Print spaces for righthand spacing
      for(spacer = 0; spacer < max_right_spaces; spacer++)
      {
        putc(' ', stdout);
      }

      putc('\n', stdout);
    }

    // seq b
    print_alignment_part(result->result_b, result->result_a,
                         result->pos_b, result->len_b,
                         seq_b,
                         left_spaces_b, right_spaces_b,
                         context_left-left_spaces_b,
                         context_right-right_spaces_b);

    printf("\n");

    // Flush output here
    fflush(stdout);
  }

  fputs("==\n", stdout);
  fflush(stdout);

  // Increment sequence alignment counter
  alignment_index++;
}