Exemple #1
0
/* last item in predecessors is assumed to be the most recently visited */
int cm_get_unspooled_state(CategoryMap *cm, int spooled_state, 
                           List *predecessors) {
  UnspoolNode *n, *child;
  int p, pred_idx, i;

  pred_idx = lst_size(predecessors) - 1;
  n = cm->unspooler->spooled_to_unspooled[spooled_state];
  while (n->newstate == -1) {
    child = NULL;
    while (n != child && pred_idx >= 0) {
      p = lst_get_int(predecessors, pred_idx--);
      for (i = 0; n != child && i < lst_size(n->children); i++) {
        child = (UnspoolNode*)lst_get_ptr(n->children, i);
        if (child->oldstate == p) 
          n = child;
      }
    }
    if (n != child) {
      fprintf(stderr, "ERROR (cm_get_unspooled_state): no match for state %d preceded by state(s) ", spooled_state);
      for (i = 0; i < lst_size(predecessors); i++) 
        fprintf(stderr, "%d ", lst_get_int(predecessors, i));
      fprintf(stderr, "\n");
      return -1;
    }
    if (n != child)
      die("ERROR cm_get_unspooled_state: n != child\n");
  }
  return n->newstate;
}
Exemple #2
0
/* Return a list of category names corresponding to a given list of
   category names and or numbers.  Doesn't allocate new names,
   just pointers to Strings in the CategoryMap object or the
   provided List */
List *cm_get_category_str_list(CategoryMap *cm, List *names, int ignore_missing) {
  int i, cat;
  List *retval = lst_new_ptr(lst_size(names));
  for (i = 0; i < lst_size(names); i++) {
    String *n = lst_get_ptr(names, i);
    if (str_as_int(n, &cat) == 0) {
      if (cm == NULL)
	die("ERROR: if categories are specified by number, a category map is required\n");
      if (cat < 0 || (cm != NULL && cat > cm->ncats)) 
        die("ERROR: category number %d is out of bounds.\n", cat);
      lst_push_ptr(retval, cm_get_feature(cm, cat));
    }
    else {
      if (cm != NULL) {
	cat = cm_get_category(cm, n);
	if (cat == 0 && !ignore_missing && !str_equals(n, cm_get_feature(cm, 0))) {
	  die("ERROR: illegal category name (\"%s\")\n", n->chars);
	}
	//return pointers to cm if possible
	lst_push_ptr(retval, cm_get_feature(cm, cat));
      }
      //otherwise return pointers to strings in list
      else lst_push_ptr(retval, n);
    }
  }
  return retval;
}
Exemple #3
0
/* Closes all outfiles.  If already closed, reopen with append, add #eof 
   closer, and close again.  see comment above at get_outfile */
void close_outfiles(List *outfileList, Hashtable *outfileHash) {
  List *keys = hsh_keys(outfileHash);
  int *done, idx, i;
  char *fname;
  FILE *outfile;
  done = smalloc(lst_size(keys)*sizeof(int));
  for (i=0; i<lst_size(keys); i++) {
    done[i]=0;
    fname = (char*)lst_get_ptr(keys, i);
    idx = hsh_get_int(outfileHash, fname);
    outfile = (FILE*)lst_get_ptr(outfileList, idx);
    if (outfile != NULL) {
      mafBlock_close_outfile(outfile);
      done[i]=1;
    }
  }
  for (i=0; i<lst_size(keys); i++) {
    if (done[i]) continue;
    fname = (char*)lst_get_ptr(keys, i);
    outfile = phast_fopen(fname, "a");
    mafBlock_close_outfile(outfile);
  }
  sfree(done);
  lst_free(keys);
  lst_free(outfileList);
  hsh_free(outfileHash);
}
Exemple #4
0
/* add a leaf with specified name to root branch */
void tr_add_leaf_at_root(TreeNode *t, char *lname, int lgroup) {
  TreeNode *newanc, *newleaf;

  newanc = tr_new_node();
  newleaf = tr_new_node();
  strcpy(newleaf->name, lname);
  newleaf->dparent = lgroup;

  /* we don't want to change the identity of the root node, so will
     add the new node below it and rewire as necessary */
  newanc->lchild = t->lchild;
  newanc->rchild = t->rchild;
  t->lchild->parent = newanc;
  t->rchild->parent = newanc;
  t->lchild = newanc;
  t->rchild = newleaf;
  newanc->parent = t;
  newleaf->parent = t;

  newanc->dparent = t->dparent;

  if (lgroup == newanc->dparent) 
    t->dparent = lgroup;    
  else
    t->dparent = 0; 

  /* fix up ids and nodes list */
  lst_push_ptr(t->nodes, newanc);
  newanc->id = lst_size(t->nodes) - 1; /* circumvent normal id assignment */
  lst_push_ptr(t->nodes, newleaf);
  newleaf->id = lst_size(t->nodes) - 1;
  t->nnodes += 2;
}
/* Exclude stop codons from all CDS in a group, as necessary.  Record
   any features that are changed, so they can be changed back before
   data is output */
void exclude_stops(GFF_FeatureGroup *group, List *starts_adjusted, 
                   List *ends_adjusted) {
  int j, k;
  List *stops = lst_new_ptr(1), *gfeatures = group->features;
  GFF_Feature *feat;
  lst_clear(stops); lst_clear(ends_adjusted); lst_clear(starts_adjusted);
  for (j = 0; j < lst_size(gfeatures); j++) { /* first grab all stops.  We 
                                                 expect at most one, but more 
                                                 are possible */
    feat = lst_get_ptr(gfeatures, j);
    if (str_equals_charstr(feat->feature, GFF_STOP_TYPE)) lst_push_ptr(stops, feat);
  }
  for (j = 0; j < lst_size(gfeatures); j++) { /* now look at CDSs */
    feat = lst_get_ptr(gfeatures, j);
    if (str_equals_charstr(feat->feature, GFF_CDS_TYPE)) {
      for (k = 0; k < lst_size(stops); k++) { /* check stops */
        GFF_Feature *stop = lst_get_ptr(stops, k);
        if (feat->strand == '+' && stop->strand == '+' && 
            feat->end == stop->end) {
          feat->end -= 3; 
          lst_push_ptr(ends_adjusted, feat);
        }
        else if (feat->strand == '-' && stop->strand == '-' && 
                 feat->start == stop->start) {
          feat->start += 3; 
          lst_push_ptr(starts_adjusted, feat);
        }
      }
    }
  }
  lst_free(stops);
}
Exemple #6
0
/* add leaf with specified name to specified internal branch */
void tr_add_leaf_internal(TreeNode *t, int branch, char *lname, int lgroup) {
  TreeNode *oldnode, *newanc, *newleaf;

  oldnode = lst_get_ptr(t->nodes, branch); /* node beneath branch in question */
  if (oldnode == t)
    die("ERROR tr_add_leaf_internal: oldnode == t\n");

  newanc = tr_new_node();
  newleaf = tr_new_node();
  strcpy(newleaf->name, lname);
  newleaf->dparent = lgroup;

  newanc->rchild = newleaf;
  newleaf->parent = newanc;
  newanc->lchild = oldnode;
  newanc->parent = oldnode->parent; 

  if (oldnode->parent->lchild == oldnode)
    oldnode->parent->lchild = newanc;
  else 
    oldnode->parent->rchild = newanc;

  oldnode->parent = newanc;

  if (lgroup > 0 && lgroup == oldnode->dparent)
    newanc->dparent = lgroup;

  /* fix up ids and nodes list */
  lst_push_ptr(t->nodes, newanc);
  newanc->id = lst_size(t->nodes) - 1; /* circumvent normal id assignment */
  lst_push_ptr(t->nodes, newleaf);
  newleaf->id = lst_size(t->nodes) - 1;
  t->nnodes += 2;
}
Exemple #7
0
void mafBlock_strip_eLines(MafBlock *block) {
  int i, *keep = smalloc(lst_size(block->data)*sizeof(int));
  for (i=0; i<lst_size(block->data); i++) 
    keep[i] = (((MafSubBlock*)lst_get_ptr(block->data, i))->lineType[0] != 'e');
  mafBlock_remove_lines(block, keep);
  sfree(keep);
}
Exemple #8
0
/* Create a category map with a category for each feature type in a
    GFF_Set.  Category numbers are assigned in order of appearance of
    types */
CategoryMap* cm_new_from_features(GFF_Set *feats) {
  int i;
  CategoryMap *retval;
  Hashtable *hash;
  List *types;

  /* first scan features for all types */
  hash = hsh_new(10);
  types = lst_new_ptr(10);
  for (i = 0; i < lst_size(feats->features); i++) {
    GFF_Feature *f = lst_get_ptr(feats->features, i);
    checkInterruptN(i, 10000);
    if (hsh_get(hash, f->feature->chars) == (void*)-1) {
      lst_push_ptr(types, f->feature);
      hsh_put_int(hash, f->feature->chars, 1);
    }
  }
  hsh_free(hash);

  /* now create a simple category map */
  retval = cm_new(lst_size(types));
  for (i = 0; i <= retval->ncats; i++) {
    String *type = i == 0 ? str_new_charstr(BACKGD_CAT_NAME) : 
      str_dup(lst_get_ptr(types, i-1));
    retval->ranges[i] = cm_new_category_range(type, i, i);
  }
  lst_free(types);
  return retval;
}
Exemple #9
0
/* conditioned_on must be an array of integer lists; specifically, the
   ith element must be the list of state numbers on which the ith
   state is conditioned. */
Unspooler *cm_create_unspooler(int nstates_spooled, List **conditioned_on) {
  UnspoolNode *n;
  int i, j;
  Stack *s;
  Unspooler *unsp;
  int *mark;
  int capacity;

  unsp = (Unspooler*)smalloc(sizeof(Unspooler));
  unsp->nstates_spooled = nstates_spooled;
  unsp->nstates_unspooled = 0;
  unsp->spooled_to_unspooled = 
    (UnspoolNode**)smalloc(nstates_spooled * sizeof(UnspoolNode*));
  capacity = nstates_spooled * nstates_spooled;
  unsp->unspooled_to_spooled = (int*)smalloc(capacity * sizeof(int));

  mark = (int*)smalloc(nstates_spooled * sizeof(int));
  s = stk_new_ptr(nstates_spooled);
  for (i = 0; i < nstates_spooled; i++) {
    /* erase marks (used to detect cycles) */
    for (j = 0; j < nstates_spooled; j++) mark[j] = 0;

    unsp->spooled_to_unspooled[i] = cm_new_unspool_node(i);
    stk_push_ptr(s, unsp->spooled_to_unspooled[i]);
    while ((n = (UnspoolNode*)stk_pop_ptr(s)) != NULL) {
      if (conditioned_on[n->oldstate] == NULL ||
          lst_size(conditioned_on[n->oldstate]) == 0) {
        n->newstate = unsp->nstates_unspooled++;

        /* mapping to spooled space */
        if (n->newstate >= capacity) {
          capacity *= 2;
          unsp->unspooled_to_spooled = 
            (int*)srealloc(unsp->unspooled_to_spooled, 
                          capacity * sizeof(int));          
        }
        unsp->unspooled_to_spooled[n->newstate] = i;
      }
      else {
        for (j = 0; j < lst_size(conditioned_on[n->oldstate]); j++) {
          int oldstate = lst_get_int(conditioned_on[n->oldstate], j);
          UnspoolNode *m;

          if (mark[oldstate] == 1)
            die("ERROR: cycle in 'conditioned_on' dependencies.\n");
          mark[oldstate] = 1;

          m = cm_new_unspool_node(oldstate);
          lst_push_ptr(n->children, m);
          stk_push_ptr(s, m);
        }
      }
    }
  }
  stk_free(s);
  sfree(mark);
  return unsp;
}
Exemple #10
0
void mafBlock_print(FILE *outfile, MafBlock *block, int pretty_print) {
  int i, j, k, numSpace;
  int fieldSize[6];  //maximum # of characters in the first 6 fields of block
  MafSubBlock *sub;
  char firstChar, formatstr[1000];
  char *firstseq=NULL;

  //if processing has reduced the number of species with data to zero, or has
  //reduced the block to all gaps, don't print
  if (lst_size(block->data) == 0 ||
      mafBlock_all_gaps(block)) return;
  mafBlock_remove_gap_cols(block);
  mafBlock_get_fieldSizes(block, fieldSize);

  fprintf(outfile, "%s\n", block->aLine->chars);
  for (i=0; i<lst_size(block->data); i++) {
    sub = (MafSubBlock*)lst_get_ptr(block->data, i);
    for (j=0; j<sub->numLine; j++) {
      firstChar = sub->lineType[j];
      if (firstChar == 's' || firstChar == 'e') {
	sprintf(formatstr, "%%c %%-%is %%%ii %%%ii %%c %%%ii ",
		fieldSize[1], fieldSize[2], fieldSize[3], fieldSize[5]);
	fprintf(outfile, formatstr, firstChar, sub->src->chars,
		sub->start, sub->size, sub->strand, sub->srcSize);
	if (firstChar == 's') {
	  if (firstseq == NULL) {
	    fprintf(outfile, "%s\n", sub->seq->chars);
	    if (pretty_print) firstseq = sub->seq->chars;
	  }
	  else {
	    for (k=0; k<block->seqlen; k++)
	      fputc(tolower(sub->seq->chars[k])==tolower(firstseq[k]) ? 
		    '.' : sub->seq->chars[k], 
		    outfile);
	  }
	}
	else fprintf(outfile, "%c\n", sub->eStatus);
      } else if (firstChar=='i') {
	sprintf(formatstr, "i %%-%is %%c %%i %%c %%i",
		fieldSize[1]);
	fprintf(outfile, formatstr, sub->src->chars,
		sub->iStatus[0], sub->iCount[0],
		sub->iStatus[1], sub->iCount[1]);
	fputc('\n', outfile);
      } else {
	if (firstChar != 'q')
	  die("ERROR mafBlock_print: firstChar should be q, got %c\n", firstChar);
	sprintf(formatstr, "q %%-%is", fieldSize[1]);
	fprintf(outfile, formatstr, sub->src->chars);
	numSpace = 6 + fieldSize[2] + fieldSize[3] + fieldSize[5];
	for (k=0; k<numSpace; k++) fputc(' ', outfile);
	fprintf(outfile, "%s\n", sub->quality->chars);
      }
    }
  }
  fputc('\n', outfile);  //blank line to mark end of block
  //  fflush(outfile);
}
Exemple #11
0
/* Print a CategoryMap to a file */
void cm_print(CategoryMap *cm, FILE *F) {
  int i, j, k;
  List *tmpl;
  fprintf(F, "NCATS = %d\n\n", cm->ncats);

  for (i = 1; i <= cm->ncats; i++) {
    CategoryRange *cr = cm->ranges[i];
    for (j = 0; j < lst_size(cr->feature_types); j++) {
      String *s = (String*)lst_get_ptr(cr->feature_types, j);
      fprintf(F, "%-15s %d", s->chars, cr->start_cat_no);
      if (cr->end_cat_no > cr->start_cat_no)
        fprintf(F, "-%d", cr->end_cat_no);
      if (cm->conditioned_on[i] != NULL) {
        fprintf(F, "\t");
        for (k = 0; k < lst_size(cm->conditioned_on[i]); k++)
          fprintf(F, "%d%s", lst_get_int(cm->conditioned_on[i], k),
                  k + 1 == lst_size(cm->conditioned_on[i]) ? "" : ",");
      }
      fprintf(F, "\n");
    }
    i = cr->end_cat_no;         /* avoid looking multiple times at the
                                   same range */
  }

  /* reconstruct precedence lists */
  tmpl = lst_new_int(cm->ncats + 1);
  for (i = 0; i <= cm->ncats; i++) 
    lst_push_int(tmpl, i);
  prec = cm->labelling_precedence;
  lst_qsort(tmpl, compare_prec);
  fprintf(F, "\nLABELLING_PRECEDENCE = ");
  for (i = 0; i <= cm->ncats; i++) {
    int cat = lst_get_int(tmpl, i);
    if (cm->labelling_precedence[cat] != -1)
      fprintf(F, "%d%s", cat, i < cm->ncats ? "," : "");
  }
  fprintf(F, "\n");

  lst_clear(tmpl);
  for (i = 0; i <= cm->ncats; i++) 
    lst_push_int(tmpl, i);
  prec = cm->fill_precedence;
  lst_qsort(tmpl, compare_prec);
  fprintf(F, "FILL_PRECEDENCE = ");
  for (i = 0; i <= cm->ncats; i++) {
    int cat = lst_get_int(tmpl, i);
    if (cm->fill_precedence[cat] != -1)
      fprintf(F, "%d%s", cat, i < cm->ncats ? "," : "");
  }
  fprintf(F, "\n");
  lst_free(tmpl);
}
/* Restore cds coords to include stop codons, as necessary */
void restore_stops(GFF_FeatureGroup *group, List *starts_adjusted,
                   List *ends_adjusted) {
  int j;
  if (lst_size(ends_adjusted) == 0 && lst_size(starts_adjusted) == 0)
    return;
  for (j = 0; j < lst_size(group->features); j++) {
    GFF_Feature *feat = lst_get_ptr(group->features, j);
    if (str_equals_charstr(feat->feature, GFF_CDS_TYPE)) {
      if (lst_find_ptr(ends_adjusted, feat) != -1) feat->end += 3;
      else if (lst_find_ptr(starts_adjusted, feat) != -1) feat->start -= 3;
    }
  }
}
Exemple #13
0
SEXP rph_tree_summary_depth(SEXP treeP) {
  TreeNode *tr = rph_tree_new(treeP), *node;
  int i;
  List *nodes = tr_preorder(tr);
  SEXP result = PROTECT(NEW_NUMERIC(lst_size(nodes)));
  double *d = NUMERIC_POINTER(result);
  for (i=0; i < lst_size(nodes); i++) {
    node = (TreeNode*)lst_get_ptr(nodes, i);
    d[i] = tr_distance_to_root(node);
  }
  UNPROTECT(1);
  return result;
}
/* given a list of 5' and 3' splice sites extracted from a group,
   check whether they form valid pairs in all species */
int are_introns_okay(List *intron_splice,  MSA *msa, List *problems,
                     int offset5, int offset3) {
  int i, j, start1, start2;
  char str1[3], str2[3], str12[5];
  char strand;
  int retval = 1;
  char * splice_pairs[3] = {"GTAG", "GCAG", "ATAC"};

  str1[2] = '\0'; str2[2] = '\0';

  if (lst_size(intron_splice) < 2) return 1;

  strand = ((GFF_Feature*)lst_get_ptr(intron_splice, 0))->strand;
                                /* assume all same strand */

  if (strand == '+')
    lst_qsort(intron_splice, feature_comparator_ascending); 
  else
    lst_qsort(intron_splice, feature_comparator_descending); 

  for (i = 0; i < lst_size(intron_splice) - 1; i++) {
    /* assume every 5' splice and immediately following 3' splice
       form a pair */
    GFF_Feature *f1 = lst_get_ptr(intron_splice, i);
    GFF_Feature *f2 = lst_get_ptr(intron_splice, i+1);
    if (str_starts_with_charstr(f1->feature, SPLICE_5) &&
        str_starts_with_charstr(f2->feature, SPLICE_3)) {
      start1 = f1->start - 1 + (strand == '-' ? offset5 : 0);
      start2 = f2->start - 1 + (strand == '+' ? offset3 : 0);
      for (j = 0; j < msa->nseqs; j++) {
        str1[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1], j, 0);
        str1[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1+1], j, 0);
        str2[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2], j, 0);
        str2[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2+1], j, 0);
        if (strand == '-') {
          msa_reverse_compl_seq(str1, 2);
          msa_reverse_compl_seq(str2, 2);
        }
	strcpy(str12, str1); strcat(str12, str2);
        if (!is_signal(str12, 3, splice_pairs, msa->is_missing)) {
          problem_add(problems, f1, BAD_INTRON, -1, -1);
          problem_add(problems, f2, BAD_INTRON, -1, -1);
          retval = 0;
          break;
        }
      }
      i++;                      /* no need to look at next feature */
    }
  }
  return retval;
}
Exemple #15
0
SEXP rph_gff_features(SEXP gffP) {
  GFF_Set *gff = (GFF_Set*)EXTPTR_PTR(gffP);
  GFF_Feature *f;
  int i;
  SEXP rv;
  PROTECT(rv = allocVector(STRSXP, lst_size(gff->features)));
  for (i=0; i < lst_size(gff->features); i++) {
    checkInterruptN(i, 1000);
    f = (GFF_Feature*)lst_get_ptr(gff->features, i);
    SET_STRING_ELT(rv, i, mkChar(f->feature->chars));
  }
  UNPROTECT(1);
  return rv;
}
Exemple #16
0
SEXP rph_gff_scores(SEXP gffP) {
  GFF_Set *gff = (GFF_Set*)EXTPTR_PTR(gffP);
  GFF_Feature *f;
  int i;
  SEXP rv;
  PROTECT(rv = allocVector(REALSXP, lst_size(gff->features)));
  for (i=0; i<lst_size(gff->features); i++) {
    checkInterruptN(i, 1000);
    f = (GFF_Feature*)lst_get_ptr(gff->features, i);
    REAL(rv)[i] = f->score;
  }
  UNPROTECT(1);
  return rv;
}
Exemple #17
0
/* return list of category names corresponding to list of category
   numbers */
List *cm_get_features(CategoryMap *cm, List *catnos) {
  int mark[cm->ncats+1];
  List *retval = lst_new_ptr(lst_size(catnos));
  int i, cat;
  for (i = 0; i <= cm->ncats; i++) mark[i] = 0;
  for (i = 0; i < lst_size(catnos); i++) {
    cat = lst_get_int(catnos, i);
    if (!mark[cm->ranges[cat]->start_cat_no]) {
      lst_push_ptr(retval, cm_get_feature(cm, cat));
      mark[cm->ranges[cat]->start_cat_no] = 1;
    }
  }
  return retval;
}
Exemple #18
0
SEXP rph_tree_summary_nodenames(SEXP treeP) {
  TreeNode *tr = rph_tree_new(treeP), *node;
  int i;
  List *nodes = tr_preorder(tr);
  SEXP result = PROTECT(NEW_CHARACTER(lst_size(nodes)));
  for (i=0; i < lst_size(nodes); i++) {
    node = (TreeNode*)lst_get_ptr(nodes, i);
    if (strlen(node->name)==0 || strcmp(node->name, ";")==0)
      SET_STRING_ELT(result, i, NA_STRING);
    else SET_STRING_ELT(result, i, mkChar(node->name));
  }
  UNPROTECT(1);
  return result;
}
Exemple #19
0
SEXP rph_tree_summary_len(SEXP treeP) {
  TreeNode *tr = rph_tree_new(treeP), *node;
  int i;
  List *nodes = tr_preorder(tr);
  SEXP result = PROTECT(NEW_NUMERIC(lst_size(nodes)));
  double *d = NUMERIC_POINTER(result);
  for (i=0; i < lst_size(nodes); i++) {
    node = (TreeNode*)lst_get_ptr(nodes, i);
    if (node->parent == NULL) d[i] = -1;
    else d[i] = node->dparent;
  }
  UNPROTECT(1);
  return result;
}
Exemple #20
0
/*
  open a file with name out_root.name.maf, or returns it if already open.
  This is a bit messy because in some cases (splitting by feature) there may
  be more output files than the OS can handle.  But it would be computationally
  expensive to check and see which files are finished, assuming that the MAF is
  sorted.  

  So, if it tries to open a file and fails, it the goes through the list of
  filehandles, finds an open one, closes it, and tries to open the new one 
  again.  Repeat until successful.

  Then, if a filehandle needs to be re-opened, it is opened with append.  Again,
  if this is not successful, it looks for another file to close.  If it can't
  find one the program reports an error and dies.

  Finally, close_outfiles below checks and makes sure that all files
  are closed with mafBlock_close_file in the end, so that they get the #eof
  closer.
 */
FILE *get_outfile(List *outfileList, Hashtable *outfileHash, String *name, char *out_root,
		  int argc, char *argv[]) {
  int idx, i;
  FILE *outfile;
  char *fname = smalloc((strlen(out_root)+name->length+7)*sizeof(char));
  sprintf(fname, "%s.%s.maf", out_root, name->chars);
  idx = ptr_to_int(hsh_get(outfileHash, fname));
  if (idx == -1) {
    hsh_put(outfileHash, fname, int_to_ptr(lst_size(outfileList)));
    outfile = mafBlock_open_outfile(fname, argc, argv);
    while (outfile==NULL) {  //too many files are open, close one first
      for (i=0; i<lst_size(outfileList); i++) {
	outfile = (FILE*)lst_get_ptr(outfileList, i);
	if (outfile != NULL) break;
      }
      if (i == lst_size(outfileList)) {
	die("ERROR: too many files open in maf_parse\n");
      } else {
	phast_fclose(outfile);
	lst_set_ptr(outfileList, i, NULL);
      }
      outfile = mafBlock_open_outfile(fname, argc, argv);
    }
    lst_push_ptr(outfileList, (void*)outfile);
    sfree(fname);
    return outfile;
  }
  outfile = (FILE*)lst_get_ptr(outfileList, idx);
  if (outfile == NULL) { //has already been opened but then closed.
    outfile = phast_fopen_no_exit(fname, "a");
    while (outfile == NULL) {
      for (i=0; i<lst_size(outfileList); i++) {
	outfile = (FILE*)lst_get_ptr(outfileList, i);
	if (outfile != NULL) break;
      }
      if (i == lst_size(outfileList)) {
	die("ERROR: too many files open in maf_parse\n");
      } else {
	phast_fclose(outfile);
	lst_set_ptr(outfileList, i, NULL);
      }
      outfile = phast_fopen_no_exit(fname, "a");
    }
    lst_set_ptr(outfileList, idx, (void*)outfile);
  }
  sfree(fname);
  return outfile;
}
Exemple #21
0
SEXP rph_tree_summary_rchild(SEXP treeP) {
  TreeNode *tr = rph_tree_new(treeP), *node;
  int i, *rchild, nnode, *idmap;
  List *nodes = tr_preorder(tr);
  SEXP result;

  nnode = lst_size(nodes);
  result = PROTECT(NEW_INTEGER(nnode));
  rchild = INTEGER_POINTER(result);
  idmap = smalloc((nnode+1)*sizeof(int));
  for (i=0; i < nnode; i++) {
    node = (TreeNode*)lst_get_ptr(nodes, i);
    if (node->id > nnode || node->id < 0)
      die("invalid id (%i) in tree node\n", node->id);
    idmap[(int)node->id] = i;
  }
  for (i=0; i < nnode; i++) {
    node = (TreeNode*)lst_get_ptr(nodes, i);
    if (node->rchild == NULL)
      rchild[idmap[node->id]] = -1;
    else rchild[idmap[node->id]] = idmap[node->rchild->id] + 1;
  }
  UNPROTECT(1);
  return result;
}
Exemple #22
0
long mafBlock_get_start(MafBlock *block, String *specName) {
  int idx=0;
  if (specName != NULL) 
    idx = hsh_get_int(block->specMap, specName->chars);
  if (idx == -1 || idx >= lst_size(block->data)) return -1;
  return ((MafSubBlock*)lst_get_ptr(block->data, idx))->start;
}
double calcMMscore(char *seqData, int base, List *MarkovMatrices, int conservative) {
  int i, baseAsNum, j;
  double val;
  int mmOrder = lst_size(MarkovMatrices)-1;
  Matrix *mm;
  int previousMMbases[mmOrder];
    
  //If there aren't mmOrder previous bases @ base, then adjust mmOrder to take advantage of however many we have
  if (base < mmOrder)
    mmOrder = base;
      
  //If we run into any unknown "N" characters, adjust the mmOrder accordingly
  for(i=mmOrder; i>0; i--)
    {
      baseAsNum = basetocol(seqData[base-i]);
      if (baseAsNum < 0)
        mmOrder = i-1;
      else
        previousMMbases[mmOrder-i] = baseAsNum;
    }
   	
  //Get score from Markov Matrix
  mm =  lst_get_ptr(MarkovMatrices, mmOrder);
  j = basesToRow(previousMMbases, mmOrder, mm->ncols);
  if (j >= 0)
    val = log(mat_get(mm, j, basetocol(seqData[base])));
  else
	{
      if (conservative == 1)
        val = log(0);	//If it is an unknown base, probability is 0, in log space =inf
      else
        val = 0; //If it is an unknown base probability is 1, in log space log(1)=0
	}
  return val;
}
/* Read substitution scores from specified file and return as a kind
   of pseudo substitution matrix.  All nonspecified elements in matrix
   will be equal to NEGINFTY, which is to be interpretted as "NA" */
Matrix* read_subst_scores(TreeModel *mod, FILE *F) {
  Matrix *retval = mat_new(mod->rate_matrix->size,
                                        mod->rate_matrix->size);
  String *line = str_new(STR_MED_LEN), *tuple1, *tuple2;
  List *l = lst_new_ptr(3);
  int alph_size = (int)strlen(mod->rate_matrix->states);
  int *inv_alph = mod->rate_matrix->inv_states;
  double val;
  mat_set_all(retval, NEGINFTY);
  while (str_readline(line, F) != EOF) {
    str_double_trim(line);
    if (str_starts_with_charstr(line, "#") || line->length == 0) 
      continue;
    str_split(line, NULL, l);
    if (lst_size(l) < 3) {
      die("ERROR: wrong number of columns in subst. score file.\n");
    }
    tuple1 = lst_get_ptr(l, 0);
    tuple2 = lst_get_ptr(l, 1);
    if (str_as_dbl(lst_get_ptr(l, 2), &val) != 0) {
      die("ERROR: bad value in subst. score file.\n");
    }
    mat_set(retval, tuple_index(tuple1->chars, inv_alph, alph_size),
                   tuple_index(tuple2->chars, inv_alph, alph_size), val);
    str_free(tuple1); str_free(tuple2); str_free(lst_get_ptr(l, 2));
  }
  lst_free(l);
  str_free(line);
  return retval;
}
/* write machine-readable log entry for discarded feature */
void write_machine_log(FILE *mlogf, GFF_FeatureGroup *group, List *problems,
                       msa_coord_map *map) {
  int i;
  for (i = 0; i < lst_size(problems); i++) {
    write_machine_problem(mlogf, group, lst_get_ptr(problems, i), map);
  }
}
Exemple #26
0
/* Identify branches wrt which a given feature is uninformative,
   in the sense that all leaves beneath these branches having only missing
   data.  Will set (preallocated) array has_data[i] = I(branch above
   node i is informative).  Will also set *nspec equal to number of
   leaves that have data. */
void ff_find_missing_branches(TreeModel *mod, MSA *msa, GFF_Feature *feat,
                              int *has_data, int *nspec) {
    int i, j;
    List *traversal = tr_postorder(mod->tree);
    *nspec = 0;
    for (i = 0; i < lst_size(traversal); i++) {
        TreeNode *n = lst_get_ptr(traversal, i);
        if (!((n->lchild == NULL && n->rchild == NULL) ||
                (n->lchild != NULL && n->rchild != NULL)))
            die("ERROR ff_find_missing_branches: lchild and rchild should both be NULL or not NULL\n");
        if (n->parent == NULL)      /* root */
            has_data[n->id] = FALSE;
        else if (n->lchild == NULL) {    /* leaf */
            has_data[n->id] = FALSE;       /* initialize to F, set to T if
                                        base in any col in feature */
            for (j = feat->start-1; j < feat->end; j++) {
                if (mod->rate_matrix->
                        inv_states[(int)ss_get_char_tuple(msa, msa->ss->tuple_idx[j],
                                   mod->msa_seq_idx[n->id], 0)] >= 0) {
                    has_data[n->id] = TRUE;
                    (*nspec)++;
                    break;
                }
            }
        }
        else {                      /* non-root ancestral node */
            if (has_data[n->lchild->id] || has_data[n->rchild->id])
                has_data[n->id] = TRUE;
            else
                has_data[n->id] = FALSE;
        }
    }
}
/* Reset a problem list to the empty state */
void problems_clear(List *problems) {
  int i;
  for (i = 0; i < lst_size(problems); i++) {
    problem_free(lst_get_ptr(problems, i));
  }
  lst_clear(problems);
}
int is_exon(GFF_Feature *feat, List *l) {
  int i;
  for (i = 0; i < lst_size(l); i++) 
    if (str_equals_nocase(feat->feature, (String*)lst_get_ptr(l, i)))
      return 1;
  return 0;
}
Exemple #29
0
//if exclude==0, removes all species not in list.
//if exclude==1, removes all species in list
void mafBlock_subSpec(MafBlock *block, List *specNameList, int include) {
  String *str;
  int i, idx, *keep, oldSize = lst_size(block->data);

  keep = smalloc(oldSize*sizeof(int));
  for (i=0; i<oldSize; i++) keep[i]=(include==0);

  for (i=0; i<lst_size(specNameList); i++) {
    str = (String*)lst_get_ptr(specNameList, i);
    idx = hsh_get_int(block->specMap, str->chars);
    if (idx != -1) keep[idx] = !(include==0);
  }
  mafBlock_remove_lines(block, keep);
  sfree(keep);
  return;
}
Exemple #30
0
/** maps a sequence (array) of category numbers from the spooled space to
   the unspooled space, using the current unspooler.  Original
   sequence is overwritten */
void cm_spooled_to_unspooled(CategoryMap *cm, int *path, int pathlen) {
  int j, sp_state, prev_sp_state;
  List *pred;

  if (cm->unspooler == NULL) return;

  pred = lst_new_int(cm->unspooler->nstates_spooled);
  prev_sp_state = -1;
  for (j = 0; j < pathlen; j++) {
    if (!(path[j] >= 0 && path[j] <= cm->unspooler->nstates_spooled))
      die("ERROR cm_spooled_to_unspooled: path[%i]=%i, should be in [0, %i]\n",
	  j, path[j], cm->unspooler->nstates_spooled);

    sp_state = path[j];
    path[j] = cm_get_unspooled_state(cm, path[j], pred);

    if (path[j] == -1) 
      die("ERROR: failure mapping to uspooled state at position %d.\n", j);

    if (sp_state != prev_sp_state) {
      /* if the current (spooled) state is not conditioned on any
         other state, then its predecessor cannot matter, so the list
         can be cleared */
      if (lst_size(cm->unspooler->spooled_to_unspooled[sp_state]->children) == 0)
        lst_clear(pred);

      lst_push_int(pred, sp_state);
    }

    prev_sp_state = sp_state;
  }

  lst_free(pred);
}