Beispiel #1
0
static int setup_indices(void)
{
	DIR *dir;
	struct dirent *de;
	char filename[PATH_MAX];
	unsigned char sha1[20];
	sprintf(filename, "%s/objects/pack/", path);
	dir = opendir(filename);
	if (!dir)
		return -1;
	while ((de = readdir(dir)) != NULL) {
		int namelen = strlen(de->d_name);
		if (namelen != 50 || 
		    strcmp(de->d_name + namelen - 5, ".pack"))
			continue;
		get_sha1_hex(de->d_name + 5, sha1);
		setup_index(sha1);
	}
	closedir(dir);
	return 0;
}
Beispiel #2
0
static int fetch_indices(struct walker *walker, struct alt_base *repo)
{
	unsigned char sha1[20];
	char *url;
	struct strbuf buffer = STRBUF_INIT;
	char *data;
	int i = 0;
	int ret = 0;

	struct active_request_slot *slot;
	struct slot_results results;

	if (repo->got_indices)
		return 0;

	if (walker->get_verbosely)
		fprintf(stderr, "Getting pack list for %s\n", repo->base);

	url = xmalloc(strlen(repo->base) + 21);
	sprintf(url, "%s/objects/info/packs", repo->base);

	slot = get_active_slot();
	slot->results = &results;
	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
	curl_easy_setopt(slot->curl, CURLOPT_URL, url);
	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
	if (start_active_slot(slot)) {
		run_active_slot(slot);
		if (results.curl_result != CURLE_OK) {
			if (missing_target(&results)) {
				repo->got_indices = 1;
				goto cleanup;
			} else {
				repo->got_indices = 0;
				ret = error("%s", curl_errorstr);
				goto cleanup;
			}
		}
	} else {
		repo->got_indices = 0;
		ret = error("Unable to start request");
		goto cleanup;
	}

	data = buffer.buf;
	while (i < buffer.len) {
		switch (data[i]) {
		case 'P':
			i++;
			if (i + 52 <= buffer.len &&
			    !prefixcmp(data + i, " pack-") &&
			    !prefixcmp(data + i + 46, ".pack\n")) {
				get_sha1_hex(data + i + 6, sha1);
				setup_index(walker, repo, sha1);
				i += 51;
				break;
			}
		default:
			while (i < buffer.len && data[i] != '\n')
				i++;
		}
		i++;
	}

	repo->got_indices = 1;
cleanup:
	strbuf_release(&buffer);
	free(url);
	return ret;
}
Beispiel #3
0
extern double *read_markov_model( 
  char *pfile, 					/* name of probability file */
  double *freq,					/* letter frequencies */
  char *alpha,					/* alphabet expected */
  BOOLEAN add_x,				/* add x-tuples if TRUE */
  BOOLEAN rc,					/* average reverse complements*/
  int *order					/* order of model read */
) 
{
  int i;					/* index into array */
  double a_p[MAX_BACK_SIZE];			/* tuple-prob array */
  double *a_cp=NULL; 				/* conditional prob. array */
  FILE *pfilep;					/* file pointer to file */
  char *line=NULL;				/* line buffer */
  char **fields=NULL;				/* fields of line */
  int nfields;					/* number of fields in line */
  int line_no=0;				/* line number */
  char *tuple;					/* the tuple */
  double p;					/* the probability */
  int maxw=0;					/* maximum tuple width */
  int alen=strlen(alpha);			/* length of alphabet */
  int ntuples;					/* number of tuples */

  /* check input */
  if (!pfile && !freq) {
    fprintf(stderr, "read_markov_model error: specify pfile or freq\n");
    exit(1);
  }

  /* add 'X 'to the alphabet if requested */
  if (add_x) {
    char *tmp = NULL;
    Resize(tmp, alen+2, char);
    strcpy(tmp, alpha);
    tmp[alen] = 'X'; tmp[alen+1] = '\0';
    alpha = tmp;
    alen++; 
  }

  /* setup the mapping from ascii to integer and back */
  setup_index(alpha);

  /* use the frequencies if given */
  if (freq) {					/* frequencies given */
    Resize(a_cp, alen, double);
    for (i=0; i<alen-add_x; i++) RND(freq[i], 8, a_cp[i]);
    if (add_x) a_cp[i] = 1.0;			/* Pr(X) */
    /* average reverse complement probabilities together if requested */
    if (rc) average_rc(add_x, a_cp, 1, "", 0, alpha); 
    return(a_cp);
  }

  /* initialize probability array */
  for (i=0; i<MAX_BACK_SIZE; i++) a_p[i] = -1;

  /* read in the probabilities and save indexed by uppercase tuple name */
  if (!(pfilep = fopen(pfile, "r"))) {
    fprintf(stderr, "Unable to open file %s for reading.\n", pfile);
    exit(1);
  }

  /*fprintf(stderr, "Reading background probabilities...\n");*/
  while (1) {					/* read file */
    int len, index;
    line_no++;
    Getline(pfilep, line, len);			/* read next line */
    if (!line) break;				/* at EOF */
    if (line[0] == '#') continue;		/* skip comments */
    Split(line, fields, nfields);		/* get tuple and prob */
    if (nfields != 2) {
      fprintf(stderr, 
        "Formatting error in file %s line %d: %s\n", pfile, line_no, line);
      exit(1);
    }
    tuple = fields[0];
    p = atof(fields[1]);
    if (p<0 || p>1) {
      fprintf(stderr, "Illegal probability in file %s line %d: %s\n", 
        pfile, line_no, line);
    }
    len = strlen(tuple);
    maxw = MAX(len, maxw);
    index = s2i(tuple);
    if (index < 0) {
      fprintf(stderr, "Illegal character in word `%s' in file %s line %d: %s\n",
        tuple, pfile, line_no, line);
      exit(1);
    }
    if (index >= MAX_BACK_SIZE) {
      for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);
      fprintf(stderr, "Background model too large.  Use smaller model or increase \nMAX_BACK_SIZE to at least %d in background.h and recompile.\n", ntuples);
      exit(1);
    }
    a_p[index] = p;				/* store probability */
  }
  fclose(pfilep);

  /* check that all necessary probabilities are defined */
  tuple = check_prob(add_x, a_p, maxw, "", 0, alpha); 
  if (tuple) { 
    fprintf(stderr, "File %s gives no probability for %s.\n", pfile, 
      tuple);
    exit(1);
  }

  *order = maxw - 1;				/* order of Markov model */

  /* average reverse complement probabilities together if requested */
  if (rc) average_rc(add_x, a_p, maxw, "", 0, alpha); 

  /* get conditional probabilities */
  for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);
  a_cp = get_cond_prob(a_p, ntuples);

  /* print the probabilities */
#ifdef DEBUG
  print_prob(a_cp, maxw, "", 0, alpha);
#endif

  return(a_cp);					/* return conditionals */
} /* read_markov_model */
Beispiel #4
0
extern double *get_markov_from_sequence(
  char *seq,            // the raw ASCII sequence
  const char *alpha,    // alphabet expected
  BOOLEAN rc,           // average reverse complements if TRUE
  int order,            // order of Markov model to create
  double epsilon        // pseudocount
)
{
  int i, w;
  
  /* setup the mapping from ascii to integer and back */
  setup_index(alpha);

  /* initialize probability array */
  int alen=strlen(alpha);                       /* length of alphabet */
  int ntuples = 0;				// size of array
  int maxw = order + 1;
  for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i);
  double *a_p = NULL;
  Resize(a_p, ntuples, double);			/* tuple-prob array */
  for (i=0; i<ntuples; i++) a_p[i] = epsilon;	// set counts to epsilon

  // initialize the total_counts array
  int *total_count = NULL;
  Resize(total_count, maxw+1, int);
  for (w=1; w<=maxw; w++) total_count[w] = 0;

  //
  // Scan the sequence and count tuples of sizes 1 to order+1.
  //
  int seqlen = strlen(seq);
  for (w=1; w<=maxw; w++) {
    char *tuple;
    for (i=0, tuple=seq; i<=seqlen-w; i++, tuple++) {
      char save = tuple[w];			// create the tuple in-line
      tuple[w] = '\0';				// by putting a null after tuple
      int index = s2i(tuple);			// hash the tuple
      // skip tuples containing ambiguous characters
      if (index >= 0) {
        a_p[index]++;				// update count of tuple
        total_count[w]++;			// update count for width w
      }
      tuple[w] = save;				// remove null
    } // sequence position
  } // w

  //
  // Convert counts to probabilities
  //
  int index = 0;				// array index
  for (w=1; w<=maxw; w++) {
    int j;					// tuple index
    for (j=0; j<pow(alen, w); j++, index++) {
      a_p[index] /= (total_count[w] + ((seqlen-w+1)*epsilon));
    } // j
  } // w

  /* average reverse complement probabilities together if requested */
  if (rc) average_rc(1, a_p, maxw, "", 0, alpha); 

  //print_prob(a_p, maxw, "", 0, alpha);

  /* get conditional probabilities */
  double *a_cp = get_cond_prob(a_p, ntuples);

  //print_prob(a_cp, maxw, "", 0, alpha);

  return(a_cp);

} // get_markov_from_sequence