示例#1
0
/* Print a CategoryMap to a file */
void cm_print(CategoryMap *cm, FILE *F) {
  int i, j, k;
  List *tmpl;
  fprintf(F, "NCATS = %d\n\n", cm->ncats);

  for (i = 1; i <= cm->ncats; i++) {
    CategoryRange *cr = cm->ranges[i];
    for (j = 0; j < lst_size(cr->feature_types); j++) {
      String *s = (String*)lst_get_ptr(cr->feature_types, j);
      fprintf(F, "%-15s %d", s->chars, cr->start_cat_no);
      if (cr->end_cat_no > cr->start_cat_no)
        fprintf(F, "-%d", cr->end_cat_no);
      if (cm->conditioned_on[i] != NULL) {
        fprintf(F, "\t");
        for (k = 0; k < lst_size(cm->conditioned_on[i]); k++)
          fprintf(F, "%d%s", lst_get_int(cm->conditioned_on[i], k),
                  k + 1 == lst_size(cm->conditioned_on[i]) ? "" : ",");
      }
      fprintf(F, "\n");
    }
    i = cr->end_cat_no;         /* avoid looking multiple times at the
                                   same range */
  }

  /* reconstruct precedence lists */
  tmpl = lst_new_int(cm->ncats + 1);
  for (i = 0; i <= cm->ncats; i++) 
    lst_push_int(tmpl, i);
  prec = cm->labelling_precedence;
  lst_qsort(tmpl, compare_prec);
  fprintf(F, "\nLABELLING_PRECEDENCE = ");
  for (i = 0; i <= cm->ncats; i++) {
    int cat = lst_get_int(tmpl, i);
    if (cm->labelling_precedence[cat] != -1)
      fprintf(F, "%d%s", cat, i < cm->ncats ? "," : "");
  }
  fprintf(F, "\n");

  lst_clear(tmpl);
  for (i = 0; i <= cm->ncats; i++) 
    lst_push_int(tmpl, i);
  prec = cm->fill_precedence;
  lst_qsort(tmpl, compare_prec);
  fprintf(F, "FILL_PRECEDENCE = ");
  for (i = 0; i <= cm->ncats; i++) {
    int cat = lst_get_int(tmpl, i);
    if (cm->fill_precedence[cat] != -1)
      fprintf(F, "%d%s", cat, i < cm->ncats ? "," : "");
  }
  fprintf(F, "\n");
  lst_free(tmpl);
}
/* given a list of 5' and 3' splice sites extracted from a group,
   check whether they form valid pairs in all species */
int are_introns_okay(List *intron_splice,  MSA *msa, List *problems,
                     int offset5, int offset3) {
  int i, j, start1, start2;
  char str1[3], str2[3], str12[5];
  char strand;
  int retval = 1;
  char * splice_pairs[3] = {"GTAG", "GCAG", "ATAC"};

  str1[2] = '\0'; str2[2] = '\0';

  if (lst_size(intron_splice) < 2) return 1;

  strand = ((GFF_Feature*)lst_get_ptr(intron_splice, 0))->strand;
                                /* assume all same strand */

  if (strand == '+')
    lst_qsort(intron_splice, feature_comparator_ascending); 
  else
    lst_qsort(intron_splice, feature_comparator_descending); 

  for (i = 0; i < lst_size(intron_splice) - 1; i++) {
    /* assume every 5' splice and immediately following 3' splice
       form a pair */
    GFF_Feature *f1 = lst_get_ptr(intron_splice, i);
    GFF_Feature *f2 = lst_get_ptr(intron_splice, i+1);
    if (str_starts_with_charstr(f1->feature, SPLICE_5) &&
        str_starts_with_charstr(f2->feature, SPLICE_3)) {
      start1 = f1->start - 1 + (strand == '-' ? offset5 : 0);
      start2 = f2->start - 1 + (strand == '+' ? offset3 : 0);
      for (j = 0; j < msa->nseqs; j++) {
        str1[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1], j, 0);
        str1[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1+1], j, 0);
        str2[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2], j, 0);
        str2[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2+1], j, 0);
        if (strand == '-') {
          msa_reverse_compl_seq(str1, 2);
          msa_reverse_compl_seq(str2, 2);
        }
	strcpy(str12, str1); strcat(str12, str2);
        if (!is_signal(str12, 3, splice_pairs, msa->is_missing)) {
          problem_add(problems, f1, BAD_INTRON, -1, -1);
          problem_add(problems, f2, BAD_INTRON, -1, -1);
          retval = 0;
          break;
        }
      }
      i++;                      /* no need to look at next feature */
    }
  }
  return retval;
}
/* scans a cds for gaps.  Returns CLN_GAPS, NOVRLP_CLN_GAPS, NO_GAPS,
   or FSHIFT_BAD; doesn't try to check for compensatory indels, which
   is more complicated (this is left for the special-purpose function
   below) */
int scan_for_gaps(GFF_Feature *feat, MSA *msa, Problem **problem) {
  int msa_start = feat->start - 1;
  int msa_end = feat->end - 1;
  int i, j;
  int near_boundary = 0;
  cds_gap_type retval = NGAPS;
  List *gaps = lst_new_ptr(10);

  for (j = 0; retval != FSHIFT_BAD && j < msa->nseqs; j++) {
    for (i = msa_start; i <= msa_end; i++) {
      if (ss_get_char_pos(msa, i, j, 0) == GAP_CHAR) {
        int gap_start, gap_end;
        struct gap *g;

        for (gap_start = i-1; gap_start >= msa_start && 
               ss_get_char_pos(msa, gap_start, j, 0) == GAP_CHAR; gap_start--);
        gap_start++;            /* inclusive */
        for (gap_end = i+1; gap_end <= msa_end && 
               ss_get_char_pos(msa, gap_end, j, 0) == GAP_CHAR; gap_end++);
        gap_end--;              /* inclusive */

        if ((gap_end - gap_start + 1) % 3 != 0) {
          retval = FSHIFT_BAD;
          *problem = problem_new(feat, FSHIFT, gap_start, gap_end);
          (*problem)->cds_gap = FSHIFT_BAD;
          break;
        }

        /* note whether gaps occur near a cds boundary (within 3 sites) */
        if (gap_start <= msa_start + 3 || gap_end >= msa_end - 3)
          near_boundary = 1;
        
        if (retval == NGAPS) retval = CLN_GAPS;
        g = smalloc(sizeof(struct gap));
        g->start = gap_start;
        g->end = gap_end;
        lst_push_ptr(gaps, g);

        i = gap_end;
      }
    }
  }

  if (retval == CLN_GAPS) {     /* now check for overlaps */
    lst_qsort(gaps, gap_compare);
    retval = NOVRLP_CLN_GAPS;
    for (i = 1; i < lst_size(gaps); i++) {
      struct gap *g1 = lst_get_ptr(gaps, i-1);
      struct gap *g2 = lst_get_ptr(gaps, i);
      if (g2->start <= g1->end && 
          (g2->start != g1->start || g2->end != g1->end)) {
        retval = CLN_GAPS;
        break;
      }
    }
    if (retval == NOVRLP_CLN_GAPS && near_boundary)
      retval = CLN_GAPS;        /* note that the boundary criterion is
                                   being confounded with the overlap
                                   criterion.  Doesn't seem worth
                                   fixing at the moment ...  */
  }

  for (i = 0; i < lst_size(gaps); i++) sfree(lst_get_ptr(gaps, i));
  lst_free(gaps);
  return retval;
}
示例#4
0
/*
 *  filepath may be either a directory or a fully-qualified path.
 *  if it's fully-qualified, only directory entries that sort alphabetically
 *  after the specified file will be returned.
 *
 *  morefiles will be set if there are more entries left in the directory
 *  after maxentries have been returned.  This is intended to let the caller
 *  know they can continue reading.
 *
 *  Note that the directory may change while we're reading it.  If it does,
 *  files that have been added or removed since we started reading it may
 *  not be accurately reflected.
 */
int
list_directory(
	ctx_t		*c,			/* ARGSUSED */
	int		maxentries,
	char		*listDir,		/* directory to list */
	char		*startFile,		/* if continuing, start here */
	char		*restrictions,
	uint32_t	*morefiles,		/* OUT */
	sqm_lst_t		**direntries)		/* OUT */
{
	int		rval = 0;
	int		st = 0;
	DIR		*curdir; /* Variable for directory system calls */
	dirent64_t	*entry;	/* Pointer to a directory entry */
	dirent64_t	*entryp;
	struct stat64	sout;
	restrict_t	filter = {0};
	char		*data;	/* Pointer to data item to add to list */
	node_t		*node;
	sqm_lst_t		*lstp = NULL;
	char		buf[MAXPATHLEN + 1];
	char		*fname;

	if (ISNULL(listDir, direntries, morefiles)) {
		return (-1);
	}

	*morefiles = 0;

	/* Set up wildcard restrictions */
	rval = set_restrict(restrictions, &filter);
	if (rval) {
		return (rval);
	}

	curdir = opendir(listDir); /* Set up to ask for directory entries */
	if (curdir == NULL) {
		return (samrerr(SE_NOSUCHPATH, listDir));
	}

	*direntries = lst_create(); /* Return results in this list */
	if (*direntries == NULL) {
		closedir(curdir);
		return (-1);	/* If allocation failed, samerr is set */
	}
	lstp = *direntries;

	entry = mallocer(sizeof (struct dirent64) + MAXPATHLEN + 1);
	if (entry == NULL) {
		closedir(curdir);
		lst_free(*direntries);
		*direntries = NULL;
		return (-1);
	}

	/* Walk through directory entries */
	while ((rval = readdir64_r(curdir, entry, &entryp)) == 0) {
		if (entryp == NULL) {
			break;
		}

		fname = (char *)&(entry->d_name[0]);

		if ((strcmp(fname, ".") == 0) ||
		    (strcmp(fname, "..") == 0)) {
			continue;
		}

		/*
		 * If we were given a non-directory, start after
		 * that file alphabetically.
		 */
		if (startFile != NULL) {
			if ((strcmp(fname, startFile)) <= 0) {
				continue;
			}
		}

		/* Create full pathname and get stat info */
		snprintf(buf, sizeof (buf), "%s/%s", listDir, fname);
		if (lstat64(buf, &sout) != 0) {
			continue; /* Ignore file which can't be stat'ed */
		}

		/*
		 * think about ways to avoid a double-stat in when we're
		 * fetching file details
		 */
		if (check_restrict_stat(fname, &sout, &filter)) {
			continue; /* Not this entry */
		}

		/* copy to allocated struct */
		data = copystr(fname);
		if (data == NULL) {
			rval = -1;
			break;	/* samerr already set */
		}

		/*
		 * caller wants all entries for the directory
		 * should there be a top-end limit, to avoid the case where
		 * the directory has millions of entries?
		 */
		if (maxentries <= 0) {
			rval = lst_append(lstp, data);
			if (rval != 0) {
				free(data);
				break;
			}
			continue;
		}

		/*
		 * Directory may have more entries than requested, so pre-sort
		 * the list so we return the first <n> sorted alphabetically.
		 */
		for (node = lstp->head; node != NULL; node = node->next) {

			st = strcmp(data, (char *)(node->data));
			if (st > 0) {
				continue;
			}

			if (st < 0) {
				rval = lst_ins_before(lstp, node, data);
				data = NULL;
			}

			if ((rval != 0) || (st == 0)) {
				free(data);
				data = NULL;
			}
			break;
		}

		/* entry sorts higher than existing entries */
		if (data != NULL) {
			if (lstp->length < maxentries) {
				rval = lst_append(lstp, data);
				if (rval != 0) {
					free(data);
					break;
				}
			} else {
				/* no room for this entry */
				free(data);
				(*morefiles)++;
			}
		}

		/* Keep list to designated limits */
		if (lstp->length > maxentries) {
			/* pop off the last entry */
			lst_remove(lstp, lstp->tail);
			(*morefiles)++;
		}
	}

	closedir(curdir);
	free(entry);

	if (rval) {
		lst_free_deep(*direntries);
		*direntries = NULL;
	} else if (maxentries <= 0) {
		lst_qsort(*direntries, node_cmp);
	}

	return (rval);
}
示例#5
0
int
list_dir(
	ctx_t *c, int maxentries, char *filepath,
	char *restrictions, sqm_lst_t **direntries) /* ARGSUSED */
{
	int rval = 0;
	DIR *curdir;		/* Variable for directory system calls */
	struct dirent64 *entry;	/* Pointer to a directory entry */
	struct dirent64 *entryp;
	struct stat64 sout;
	restrict_t filter = {0};
	char *data;		/* Pointer to data item to add to list */
	char fullpath[MAXPATHLEN];

	/* Set up wildcard restrictions */
	rval = set_restrict(restrictions, &filter);
	if (rval) {
		return (rval);
	}

	curdir = opendir(filepath); /* Set up to ask for directory entries */
	if (curdir == NULL) {
		return (samrerr(SE_NOSUCHPATH, filepath));
	}

	*direntries = lst_create(); /* Return results in this list */
	if (*direntries == NULL) {
		closedir(curdir);
		return (-1);	/* If allocation failed, samerr is set */
	}

	entry = mallocer(sizeof (struct dirent64) + MAXPATHLEN + 1);
	if (entry == NULL) {
		closedir(curdir);
		lst_free(*direntries);
		*direntries = NULL;
		return (-1);
	}

	/* Walk through directory entries */
	while ((rval = readdir64_r(curdir, entry, &entryp)) == 0) {
		if (entryp == NULL) {
			break;
		}

		if ((strcmp(entry->d_name, ".") == 0) ||
		    (strcmp(entry->d_name, "..") == 0)) {
			continue;
		}
		/* Create full pathname and get stat info */
		snprintf(
		    fullpath, MAXPATHLEN, "%s/%s", filepath,
		    entry->d_name);

		if (stat64(fullpath, &sout) != 0) {
			continue; /* Ignore file which can't be stat'ed */
		}

		if (check_restrict_stat(entry->d_name, &sout, &filter))
			continue; /* Not this entry */

		data = copystr(entry->d_name); /* Copy data to allocated mem */
		if (data == NULL) {
			rval = -1;
			break;	/* samerr already set */
		}
		lst_append(*direntries, data);
		if ((*direntries)->length >= maxentries)
			break;	/* Keep list to designated limits */
	}
	free(entry);

	if (rval) {
		lst_free_deep(*direntries); /* On failure, don't return list */
		*direntries = NULL;
	} else {
		lst_qsort(*direntries, node_cmp);
	}
	closedir(curdir);
	return (rval);
}