int is_nonsense_clean(GFF_Feature *feat, MSA *msa, List *problems) {
  int i, j, len;
  char seq[feat->end - feat->start + 2];
  for (j = 1; j < msa->nseqs; j++) { /* no need to check reference seq */
    /* first copy entire sequence without gaps */
    for (i = feat->start - 1, len = 0; i < feat->end; i++) 
      if (ss_get_char_pos(msa, i, j, 0) != GAP_CHAR)
        seq[len++] = ss_get_char_pos(msa, i, j, 0);
    seq[len] = '\0';

    if (feat->strand == '-') msa_reverse_compl_seq(seq, len);

    /* now scan for stop codons */
    for (i = (3 - feat->frame) % 3; i <= len - 3; i += 3) 
      if (is_stop_codon(&seq[i])) {
	int problem_start;
	if(feat->strand == '+') problem_start = feat->start+i;
	else problem_start = feat->end-i-2;

        problem_add(problems, feat, NONSENSE, problem_start,
                    problem_start+2);
        return 0;
      }
  }

  return 1;
}
/* look for frame-shift gaps using a slightly more sophisticated
   algorithm, which allows for compensatory indels.  The strategy here
   is to identify maximal gapless blocks of greater than
   MIN_GAPLESS_BLOCK_SIZE sites, then to make sure that in the gappy portions
   between them, each sequence has a total number of gaps that equals
   the total number for the reference sequence, modulo 3.  Returns 1 if
   all gaps look okay (no net frame shift) and 0 otherwise. */
int is_fshift_okay(GFF_Feature *feat, MSA *msa) {
  int *ngaps = smalloc(msa->nseqs * sizeof(int));
  int i, j, blk_beg, blk_end, start_gappy_reg;

  for (j = 0; j < msa->nseqs; j++) ngaps[j] = 0;
  start_gappy_reg = 0;

  for (i = feat->start - 1; i < feat->end; ) {
    /* find next gapless column, simultaneously keeping track of the
       number of gaps encountered in each sequence */
    for (; i < feat->end; i++) {
      int gapless_col = 1;  
      for (j = 0; j < msa->nseqs; j++) {
        if (ss_get_char_pos(msa, i, j, 0) == GAP_CHAR) {
          ngaps[j]++;
          gapless_col = 0;
        }
      }
      if (gapless_col == 1)
        break;     
    }

    blk_beg = i;                /* inclusive */

    /* find next col with gap */
    for (i++; i < feat->end; i++) {
      for (j = 0; 
           j < msa->nseqs && ss_get_char_pos(msa, i, j, 0) != GAP_CHAR; 
           j++);
      if (j != msa->nseqs) break;
    }
    blk_end = i;                /* exclusive */

    if (blk_end - blk_beg >= MIN_GAPLESS_BLOCK_SIZE ||
        blk_beg == feat->end || /* gaps at end of aln */
        blk_end == feat->end) { /* short block at end of aln */
      /* check total number of gaps since last retained block or
         beginning of alignment; must be same as reference sequence,
         mod 3 */
      for (j = 0; j < msa->nseqs && ngaps[j] % 3 == ngaps[0] % 3; j++);
      /* reject alignment if mod 3 test fails OR if the total length
         of the gappy region exceeds MAX_GAPPY_BLOCK_SIZE */
      if (j != msa->nseqs || blk_beg - start_gappy_reg > MAX_GAPPY_BLOCK_SIZE) {
        sfree(ngaps);
        return 0;
      }

      /* reset ngaps (note: done only if block exceeds size
         threshold) */
      for (j = 0; j < msa->nseqs; j++) ngaps[j] = 0;
      start_gappy_reg = blk_end;
    }
  }
  sfree(ngaps);
  return 1;
}
/* checks to see if reference sequence looks okay wrt a given
   list of features */
int ref_seq_okay(List *features, MSA *msa, int offset3, 
                 int indel_strict, int splice_strict, List *problems) {
  List *signals = NULL;
  char *seq = NULL;
  int seqalloc = 0;
  int idx, retval = TRUE;
  GFF_Feature *feat, *lastfeat_helper = NULL;

  if (indel_strict) {
    signals = lst_new_ptr(10);
    str_split(str_new_charstr(SIGNALS), ",", signals);
  }

  for (idx = 0; idx < lst_size(features); idx++) {
    int i, j, len, has_gaps = 0; 

    feat = lst_get_ptr(features, idx);

    if (seqalloc <= feat->end - feat->start + 2) {
      seqalloc = (feat->end - feat->start) * 2; 
      seq = srealloc(seq, seqalloc * sizeof(char));
    }

    for (i = feat->start - 1, len = 0; i < feat->end; i++) {
      if (ss_get_char_pos(msa, i, 0, 0) != GAP_CHAR)
        seq[len++] = ss_get_char_pos(msa, i, 0, 0);
      else if (!has_gaps) has_gaps = 1;
    }
    seq[len] = '\0';
    if (feat->strand == '-') msa_reverse_compl_seq(seq, len);

    if (str_equals_charstr(feat->feature, GFF_START_TYPE) && strcmp(seq, "ATG") != 0) {
      problem_add(problems, feat, BAD_REF_START, -1, -1);
      retval = FALSE;
    }
    else if (str_equals_charstr(feat->feature, GFF_STOP_TYPE) && 
             (feat->frame != 0 || !is_stop_codon(seq))) {
      problem_add(problems, feat, BAD_REF_STOP, -1, -1);
      retval = FALSE;
    }
    else if (str_starts_with_charstr(feat->feature, SPLICE_5) && 
             !is_valid_5splice(seq, splice_strict)) {
      problem_add(problems, feat, BAD_REF_5_SPLICE, -1, -1);
      retval = FALSE;
    }
    else if (str_starts_with_charstr(feat->feature, SPLICE_3) &&
             !is_valid_3splice(&seq[offset3], splice_strict)) {
      problem_add(problems, feat, BAD_REF_3_SPLICE, -1, -1);
      retval = FALSE;
    }
    else if (str_equals_charstr(feat->feature, GFF_CDS_TYPE)) {
      for (i = (3 - feat->frame) % 3; i <= len - 3; i += 3) {
        if (is_stop_codon(&seq[i])) {
          problem_add(problems, feat, BAD_REF_ORF, -1, -1);
          retval = FALSE;
          break;
        }
      }
    }

    if (indel_strict) {
      int strict_okay = TRUE;
      List *signals = lst_new_ptr(10);
      str_split(str_new_charstr(SIGNALS), ",", signals);

      if (str_in_list(feat->feature, signals)) {
        /* reject any signal feature with gaps in the ref seq, unless they
           appear in a non-critical part of a splice site or in a
           "prestart" feature  */
        if (has_gaps) {          
          if (str_starts_with_charstr(feat->feature, SPLICE_5)) {
            if (ss_get_char_pos(msa, feat->start-1, 0, 0) == GAP_CHAR ||
                ss_get_char_pos(msa, feat->start, 0, 0) == GAP_CHAR)
              strict_okay = FALSE;
          }
          else if (str_starts_with_charstr(feat->feature, SPLICE_3)) {
            if (ss_get_char_pos(msa, feat->end-1, 0, 0) == GAP_CHAR ||
                ss_get_char_pos(msa, feat->end-2, 0, 0) == GAP_CHAR)
              strict_okay = FALSE;
          }
          else if (!str_equals_charstr(feat->feature, "prestart"))
            strict_okay = FALSE;
        }
        /* in addition, if two signals occur consec. with gaps and
           only gaps between them, assume a violation of
           --indel-strict */
        if (lastfeat_helper != NULL && lastfeat_helper->end < feat->start-1) {
          int allgaps = 1;
          for (j = lastfeat_helper->end; allgaps && j < feat->start-1; j++) 
                                /* note indexing: -1+1 for end and -1
                                   for start  */
            if (ss_get_char_pos(msa, j, 0, 0) != GAP_CHAR) allgaps = 0;
          if (allgaps) 
            strict_okay = FALSE;
        }
        lastfeat_helper = feat;
      }
      else lastfeat_helper = NULL;
    
      /* also exclude CDS exons of length less than 6 in indel_strict
         case -- these cause problems in exoniphy training because
         start_codon is adjacent to cds5ss */
      if (str_equals_charstr(feat->feature, GFF_CDS_TYPE) && len <= 6)
        strict_okay = FALSE;

      if (!strict_okay) {
        problem_add(problems, feat, BAD_REF_INDEL_STRICT_FAIL, -1, -1);
        retval = FALSE;
      }
      lst_free_strings(signals);
      lst_free(signals);
    }
  }
  if (seq != NULL) sfree(seq);
  return retval;
}
/* scans a cds for gaps.  Returns CLN_GAPS, NOVRLP_CLN_GAPS, NO_GAPS,
   or FSHIFT_BAD; doesn't try to check for compensatory indels, which
   is more complicated (this is left for the special-purpose function
   below) */
int scan_for_gaps(GFF_Feature *feat, MSA *msa, Problem **problem) {
  int msa_start = feat->start - 1;
  int msa_end = feat->end - 1;
  int i, j;
  int near_boundary = 0;
  cds_gap_type retval = NGAPS;
  List *gaps = lst_new_ptr(10);

  for (j = 0; retval != FSHIFT_BAD && j < msa->nseqs; j++) {
    for (i = msa_start; i <= msa_end; i++) {
      if (ss_get_char_pos(msa, i, j, 0) == GAP_CHAR) {
        int gap_start, gap_end;
        struct gap *g;

        for (gap_start = i-1; gap_start >= msa_start && 
               ss_get_char_pos(msa, gap_start, j, 0) == GAP_CHAR; gap_start--);
        gap_start++;            /* inclusive */
        for (gap_end = i+1; gap_end <= msa_end && 
               ss_get_char_pos(msa, gap_end, j, 0) == GAP_CHAR; gap_end++);
        gap_end--;              /* inclusive */

        if ((gap_end - gap_start + 1) % 3 != 0) {
          retval = FSHIFT_BAD;
          *problem = problem_new(feat, FSHIFT, gap_start, gap_end);
          (*problem)->cds_gap = FSHIFT_BAD;
          break;
        }

        /* note whether gaps occur near a cds boundary (within 3 sites) */
        if (gap_start <= msa_start + 3 || gap_end >= msa_end - 3)
          near_boundary = 1;
        
        if (retval == NGAPS) retval = CLN_GAPS;
        g = smalloc(sizeof(struct gap));
        g->start = gap_start;
        g->end = gap_end;
        lst_push_ptr(gaps, g);

        i = gap_end;
      }
    }
  }

  if (retval == CLN_GAPS) {     /* now check for overlaps */
    lst_qsort(gaps, gap_compare);
    retval = NOVRLP_CLN_GAPS;
    for (i = 1; i < lst_size(gaps); i++) {
      struct gap *g1 = lst_get_ptr(gaps, i-1);
      struct gap *g2 = lst_get_ptr(gaps, i);
      if (g2->start <= g1->end && 
          (g2->start != g1->start || g2->end != g1->end)) {
        retval = CLN_GAPS;
        break;
      }
    }
    if (retval == NOVRLP_CLN_GAPS && near_boundary)
      retval = CLN_GAPS;        /* note that the boundary criterion is
                                   being confounded with the overlap
                                   criterion.  Doesn't seem worth
                                   fixing at the moment ...  */
  }

  for (i = 0; i < lst_size(gaps); i++) sfree(lst_get_ptr(gaps, i));
  lst_free(gaps);
  return retval;
}
示例#5
0
int main(int argc, char *argv[]) {
    char c;
    List *l;
    int i, j, strand, bed_output = 0, backgd_nmods = -1, feat_nmods = -1,
                      winsize = -1, verbose = 0, max_nmods, memblocksize, old_nleaves,
                      refidx = 1, base_by_base = FALSE, windowWig = FALSE;
    TreeModel **backgd_mods = NULL, **feat_mods = NULL;
    HMM *backgd_hmm = NULL, *feat_hmm = NULL;
    msa_format_type inform = UNKNOWN_FORMAT;
    GFF_Set *features = NULL;
    MSA *msa, *msa_compl=NULL;
    double **backgd_emissions, **feat_emissions, **mem, **dummy_emissions,
           *winscore_pos=NULL, *winscore_neg=NULL;
    int *no_alignment=NULL;
    List *pruned_names;
    char *msa_fname;
    FILE *infile;

    int opt_idx;
    struct option long_opts[] = {
        {"background-mods", 1, 0, 'b'},
        {"background-hmm", 1, 0, 'B'},
        {"feature-mods", 1, 0, 'f'},
        {"feature-hmm", 1, 0, 'F'},
        {"features", 1, 0, 'g'},
        {"window", 1, 0, 'w'},
        {"window-wig", 1, 0, 'W'},
        {"base-by-base", 0, 0, 'y'},
        {"msa-format", 1, 0, 'i'},
        {"refidx", 1, 0, 'r'},
        {"output-bed", 0, 0, 'd'},
        {"verbose", 0, 0, 'v'},
        {"help", 0, 0, 'h'},
        {0, 0, 0, 0}
    };

    while ((c = getopt_long(argc, argv, "B:b:F:f:r:g:w:W:i:ydvh", long_opts, &opt_idx)) != -1) {
        switch (c) {
        case 'B':
            backgd_hmm = hmm_new_from_file(phast_fopen(optarg, "r"));
            break;
        case 'b':
            l = get_arg_list(optarg);
            backgd_nmods = lst_size(l);
            backgd_mods = smalloc(backgd_nmods * sizeof(void*));
            for (i = 0; i < backgd_nmods; i++)
                backgd_mods[i] = tm_new_from_file(phast_fopen(((String*)lst_get_ptr(l, i))->chars, "r"), 1);
            lst_free_strings(l);
            lst_free(l);
            break;
        case 'F':
            feat_hmm = hmm_new_from_file(phast_fopen(optarg, "r"));
            break;
        case 'f':
            l = get_arg_list(optarg);
            feat_nmods = lst_size(l);
            feat_mods = smalloc(feat_nmods * sizeof(void*));
            for (i = 0; i < feat_nmods; i++)
                feat_mods[i] = tm_new_from_file(phast_fopen(((String*)lst_get_ptr(l, i))->chars, "r"), 1);
            lst_free_strings(l);
            lst_free(l);
            break;
        case 'g':
            features = gff_read_set(phast_fopen(optarg, "r"));
            break;
        case 'w':
            winsize = get_arg_int(optarg);
            if (winsize <= 0) die("ERROR: window size must be positive.\n");
            break;
        case 'W':
            winsize = get_arg_int(optarg);
            if (winsize <= 0) die("ERROR: window size must be positive.\n");
            windowWig = TRUE;
            break;
        case 'y':
            base_by_base = TRUE;
            break;
        case 'i':
            inform = msa_str_to_format(optarg);
            if (inform == UNKNOWN_FORMAT) die("Bad argument to -i.\n");
            break;
        case 'r':
            refidx = get_arg_int_bounds(optarg, 0, INFTY);
            break;
        case 'd':
            bed_output = 1;
            break;
        case 'h':
            printf("%s", HELP);
            exit(0);
        case 'v':
            verbose = 1;
            break;
        case '?':
            die("Bad argument.  Try '%s -h'.\n", argv[0]);
        }
    }

    set_seed(-1);

    if (backgd_mods == NULL || feat_mods == NULL)
        die("ERROR: -b and -f required.  Try '%s -h'.\n", argv[0]);

    if (backgd_nmods == 1 && backgd_hmm == NULL)
        backgd_hmm = hmm_create_trivial();
    else if (backgd_hmm == NULL)
        die("ERROR: -B required.  Try '%s -h'.\n", argv[0]);

    if (feat_nmods == 1 && feat_hmm == NULL)
        feat_hmm = hmm_create_trivial();
    else if (feat_hmm == NULL)
        die("ERROR: -F required.  Try '%s -h'.\n", argv[0]);

    if ((winsize == -1 && features == NULL && !base_by_base) ||
            (winsize != -1 && features != NULL) ||
            (winsize != -1 && base_by_base) ||
            (features != NULL && base_by_base))
        die("ERROR: must specify exactly one of -g, -w, and -y.  Try '%s -h'.\n", argv[0]);

    if (backgd_hmm->nstates != backgd_nmods)
        die("ERROR: number of states must equal number of tree models for background.\n");

    if (feat_hmm->nstates != feat_nmods)
        die("ERROR: number of states must equal number of tree models for features.\n");

    if (features != NULL && lst_size(features->features) == 0)
        die("ERROR: empty features file.\n");

    if (base_by_base && (backgd_nmods > 1 || feat_nmods > 1))
        die("ERROR: only single phylogenetic models (not HMMs) are supported with --base-by-base.\n");

    if (optind != argc - 1)
        die("ERROR: too few arguments.  Try '%s -h'.\n", argv[0]);

    if (verbose) fprintf(stderr, "Reading alignment ...\n");
    msa_fname = argv[optind];
    infile = phast_fopen(msa_fname, "r");
    if (inform == UNKNOWN_FORMAT)
        inform = msa_format_for_content(infile, 1);
    if (inform == MAF)
        msa = maf_read(infile, NULL, 1, NULL, NULL,
                       NULL, -1, TRUE, NULL, NO_STRIP, FALSE);
    else
        msa = msa_new_from_file_define_format(infile, inform, NULL);
    if (msa_alph_has_lowercase(msa)) msa_toupper(msa);
    msa_remove_N_from_alph(msa);

    /* need ordered representation of alignment */
    if (msa->seqs == NULL && (msa->ss == NULL || msa->ss->tuple_idx == NULL) )
        die("ERROR: ordered sufficient statistics are required.\n");

    pruned_names = lst_new_ptr(msa->nseqs);
    for (i = 0; i < backgd_nmods; i++) {
        old_nleaves = (backgd_mods[i]->tree->nnodes + 1) / 2;
        tm_prune(backgd_mods[i], msa, pruned_names);
        if (lst_size(pruned_names) >= old_nleaves)
            die("ERROR: no match for leaves of tree in alignment (background model #%d)\n", i+1);
        else if (lst_size(pruned_names) > 0) {
            fprintf(stderr, "WARNING: pruned away leaves in background model (#%d) with no match in alignment (", i+1);
            for (j = 0; j < lst_size(pruned_names); j++)
                fprintf(stderr, "%s%s", ((String*)lst_get_ptr(pruned_names, j))->chars,
                        j < lst_size(pruned_names) - 1 ? ", " : ").\n");
        }
        lst_free_strings(pruned_names);
    }
    for (i = 0; i < feat_nmods; i++) {
        old_nleaves = (feat_mods[i]->tree->nnodes + 1) / 2;
        tm_prune(feat_mods[i], msa, pruned_names);
        if (lst_size(pruned_names) >= old_nleaves)
            die("ERROR: no match for leaves of tree in alignment (features model #%d)\n", i+1);
        else if (lst_size(pruned_names) > 0) {
            fprintf(stderr, "WARNING: pruned away leaves in features model (#%d) with no match in alignment (", i+1);
            for (j = 0; j < lst_size(pruned_names); j++)
                fprintf(stderr, "%s%s", ((String*)lst_get_ptr(pruned_names, j))->chars,
                        j < lst_size(pruned_names) - 1 ? ", " : ").\n");
        }
        lst_free_strings(pruned_names);
    }
    lst_free(pruned_names);

    /* first have to subtract offset from features, if necessary */
    if (msa->idx_offset != 0 && features != NULL) {
        for (i = 0; i < lst_size(features->features); i++) {
            GFF_Feature *f = lst_get_ptr(features->features, i);
            f->start -= msa->idx_offset;
            f->end -= msa->idx_offset;
        }
    }

    /* convert to coord frame of alignment */
    if (features != NULL && refidx != 0) {
        if (verbose) fprintf(stderr, "Mapping coordinates ...\n");
        msa_map_gff_coords(msa, features, refidx, 0, 0);
        if (lst_size(features->features) == 0)
            die("ERROR: no features within coordinate range of alignment.\n");
    }

    /* Make a reverse complemented copy of the alignment.  The two
       strands will be processed separately, to avoid problems with
       overlapping features, etc. */
    if (!base_by_base) {          /* skip in base by base case */
        if (verbose) fprintf(stderr, "Creating reverse complemented alignment ...\n");
        msa_compl = msa_create_copy(msa, 0);
        /* temporary workaround: make sure reverse complement not based on
           sufficient stats */
        if (msa_compl->seqs == NULL) ss_to_msa(msa_compl);
        if (msa_compl->ss != NULL) {
            ss_free(msa_compl->ss);
            msa_compl->ss = NULL;
        }
        msa_reverse_compl(msa_compl);
    }

    /* allocate memory for computing scores */
    backgd_emissions = smalloc(backgd_nmods * sizeof(void*));
    for (i = 0; i < backgd_nmods; i++)
        backgd_emissions[i] = smalloc(msa->length * sizeof(double));
    feat_emissions = smalloc(feat_nmods * sizeof(void*));
    for (i = 0; i < feat_nmods; i++)
        feat_emissions[i] = smalloc(msa->length * sizeof(double));
    max_nmods = max(backgd_nmods, feat_nmods);
    dummy_emissions = smalloc(max_nmods * sizeof(void*));
    mem = smalloc(max_nmods * sizeof(void*));
    /* memory for forward algorithm -- each block must be as large as
       the largest feature */
    if (features != NULL) {
        for (i = 0, memblocksize = -1; i < lst_size(features->features); i++) {
            GFF_Feature *f = lst_get_ptr(features->features, i);
            if (f->end - f->start + 1 > memblocksize)
                memblocksize = f->end - f->start + 1;
        }
    }
    else memblocksize = winsize;  /* -1 if base-by-base mode */

    if (memblocksize > 0)
        for (i = 0; i < max_nmods; i++)
            mem[i] = smalloc(memblocksize * sizeof(double));

    if (winsize != -1) {
        winscore_pos = smalloc(msa->length * sizeof(double));
        winscore_neg = smalloc(msa->length * sizeof(double));
        no_alignment = smalloc(msa->length * sizeof(int));

        for (i = 0; i < msa->length; i++) {
            winscore_pos[i] = winscore_neg[i] = NEGINFTY;
            if (refidx == 0)
                no_alignment[i] = FALSE;
            else
                no_alignment[i] = msa_missing_col(msa, refidx, i);
        }
    }

    /* the rest will be repeated for each strand */
    for (strand = 1; strand <= 2; strand++) {
        MSA *thismsa = strand == 1 ? msa : msa_compl;
        double *winscore = strand == 1 ? winscore_pos : winscore_neg;

        if (base_by_base && strand == 2) break; /* don't do second pass in
                                               base_by_base case */

        if (verbose) fprintf(stderr, "Processing %c strand ...\n",
                                 strand == 1 ? '+' : '-');

        /* set up dummy categories array, so that emissions are only
           computed where needed */
        thismsa->categories = smalloc(thismsa->length * sizeof(int));
        thismsa->ncats = 1;
        if (winsize != -1) {
            if (strand == 1)
                for (i = 0; i < thismsa->length; i++)
                    thismsa->categories[i] = no_alignment[i] ? 0 : 1;
            else
                for (i = 0; i < thismsa->length; i++)
                    thismsa->categories[i] = no_alignment[thismsa->length - i - 1] ? 0 : 1;
        }
        else if (features != NULL) {
            for (i = 0; i < thismsa->length; i++) thismsa->categories[i] = 0;
            for (i = 0; i < lst_size(features->features); i++) {
                GFF_Feature *f = lst_get_ptr(features->features, i);
                if (f->start <= 0 || f->end <= 0) {
                    fprintf(stderr, "WARNING: feature out of range ('");
                    gff_print_feat(stderr, f);
                    fprintf(stderr, "')\n");
                    continue;
                }

                if (strand == 1 && f->strand != '-')
                    for (j = f->start - 1; j < f->end; j++)
                        thismsa->categories[j] = 1;
                else if (strand == 2 && f->strand == '-')
                    for (j = thismsa->length - f->end;
                            j < thismsa->length - f->start + 1; j++)
                        thismsa->categories[j] = 1;
            }
        }
        else {                      /* base-by-base scores */
            for (i = 0; i < thismsa->length; i++) thismsa->categories[i] = 1;
        }
        if (thismsa->ss != NULL) ss_update_categories(thismsa);

        /* compute emissions */
        for (i = 0; i < backgd_nmods; i++) {
            if (verbose)
                fprintf(stderr, "Computing emissions for background model #%d ...\n", i+1);
            tl_compute_log_likelihood(backgd_mods[i], thismsa,
                                      backgd_emissions[i], NULL, 1, NULL);
        }
        for (i = 0; i < feat_nmods; i++) {
            if (verbose)
                fprintf(stderr, "Computing emissions for features model #%d ...\n", i+1);
            tl_compute_log_likelihood(feat_mods[i], thismsa,
                                      feat_emissions[i], NULL, 1, NULL);
        }

        /* now compute scores */
        if (winsize != -1) {        /* windows case */
            int winstart;
            if (verbose) fprintf(stderr, "Computing scores ...\n");

            for (winstart = 0; winstart <= thismsa->length - winsize; winstart++) {
                int centeridx = winstart + winsize/2;

                if (strand == 2) centeridx = thismsa->length - centeridx - 1;

                if (no_alignment[centeridx]) continue;

                for (j = 0; j < feat_nmods; j++)
                    dummy_emissions[j] = &(feat_emissions[j][winstart]);
                winscore[centeridx] = hmm_forward(feat_hmm, dummy_emissions,
                                                  winsize, mem);

                if (winscore[centeridx] <= NEGINFTY) {
                    winscore[centeridx] = NEGINFTY;
                    continue;
                }

                for (j = 0; j < backgd_nmods; j++)
                    dummy_emissions[j] = &(backgd_emissions[j][winstart]);
                winscore[centeridx] -= hmm_forward(backgd_hmm, dummy_emissions,
                                                   winsize, mem);

                if (winscore[centeridx] < NEGINFTY) winscore[centeridx] = NEGINFTY;
            }
        }
        else if (features != NULL) { /* features case */
            if (verbose) fprintf(stderr, "Computing scores ...\n");
            for (i = 0; i < lst_size(features->features); i++) {
                GFF_Feature *f = lst_get_ptr(features->features, i);
                int s, e;

                if ((strand == 1 && f->strand == '-') ||
                        (strand == 2 && f->strand != '-') ||
                        f->start <= 0 || f->end <= 0 || f->end - f->start < 0)
                    continue;

                /* effective coords */
                if (f->strand == '-') {
                    s = thismsa->length - f->end + 1;
                    e = thismsa->length - f->start + 1;
                }
                else {
                    s = f->start;
                    e = f->end;
                }

                f->score_is_null = 0;

                for (j = 0; j < feat_nmods; j++)
                    dummy_emissions[j] = &(feat_emissions[j][s-1]);
                f->score = hmm_forward(feat_hmm, dummy_emissions, e - s + 1, mem);

                if (f->score <= NEGINFTY) {
                    f->score = NEGINFTY;
                    continue;
                }

                for (j = 0; j < backgd_nmods; j++)
                    dummy_emissions[j] = &(backgd_emissions[j][s-1]);
                f->score -= hmm_forward(backgd_hmm, dummy_emissions, e - s + 1, mem);

                if (f->score < NEGINFTY) f->score = NEGINFTY;
            }
        }
    }

    if (verbose) fprintf(stderr, "Generating output ...\n");

    if (winsize != -1 && windowWig == FALSE) { /* standard windows output */
        for (i = 0, j = 0; i < msa->length; i++) {
            if (no_alignment[i] == FALSE)
                printf("%d\t%.3f\t%.3f\n", j + msa->idx_offset + 1, winscore_pos[i],
                       winscore_neg[i]);
            if (ss_get_char_pos(msa, i, 0, 0) != GAP_CHAR) j++;
        }
    }
    else if (windowWig == TRUE) { /* windows with wig output */
        int last = NEGINFTY;
        for (i = 0, j = 0; i < msa->length; i++) {
            if (refidx == 0 || msa_get_char(msa, refidx-1, i) != GAP_CHAR) {
                if (no_alignment[i] == FALSE && winscore_pos[i] > NEGINFTY) {
                    if (j > last + 1)
                        printf("fixedStep chrom=%s start=%d step=1\n",
                               refidx > 0 ? msa->names[refidx-1] : "alignment",
                               j + msa->idx_offset + 1);
                    printf("%.3f\n", winscore_pos[i]);
                    last = j;
                }
                j++;
            }
        }
    }
    else if (features != NULL) {  /* features output */
        /* return to coord frame of reference seq (also, replace offset) */
        if (refidx != 0)
            msa_map_gff_coords(msa, features, 0, refidx, msa->idx_offset);
        else if (msa->idx_offset != 0) {
            for (i = 0; i < lst_size(features->features); i++) {
                GFF_Feature *f = lst_get_ptr(features->features, i);
                f->start += msa->idx_offset;
                f->end += msa->idx_offset;
            }
        }

        if (bed_output)
            gff_print_bed(stdout, features, FALSE);
        else
            gff_print_set(stdout, features);
    }
    else {           /* base-by-base scores */
        /* in this case, we can just output the difference between the emissions */
        printf("fixedStep chrom=%s start=%d step=1\n",
               refidx > 0 ? msa->names[refidx-1] : "alignment",
               msa->idx_offset + 1);
        for (i = 0, j = 0; i < msa->length; i++) {
            if (refidx == 0 || msa_get_char(msa, refidx-1, i) != GAP_CHAR) {
                printf("%.3f\n", feat_emissions[0][i] - backgd_emissions[0][i]);
                j++;
            }
        }
    }

    if (verbose) fprintf(stderr, "\nDone.\n");

    return 0;
}