static int setup_indices(void) { DIR *dir; struct dirent *de; char filename[PATH_MAX]; unsigned char sha1[20]; sprintf(filename, "%s/objects/pack/", path); dir = opendir(filename); if (!dir) return -1; while ((de = readdir(dir)) != NULL) { int namelen = strlen(de->d_name); if (namelen != 50 || strcmp(de->d_name + namelen - 5, ".pack")) continue; get_sha1_hex(de->d_name + 5, sha1); setup_index(sha1); } closedir(dir); return 0; }
static int fetch_indices(struct walker *walker, struct alt_base *repo) { unsigned char sha1[20]; char *url; struct strbuf buffer = STRBUF_INIT; char *data; int i = 0; int ret = 0; struct active_request_slot *slot; struct slot_results results; if (repo->got_indices) return 0; if (walker->get_verbosely) fprintf(stderr, "Getting pack list for %s\n", repo->base); url = xmalloc(strlen(repo->base) + 21); sprintf(url, "%s/objects/info/packs", repo->base); slot = get_active_slot(); slot->results = &results; curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer); curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); curl_easy_setopt(slot->curl, CURLOPT_URL, url); curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL); if (start_active_slot(slot)) { run_active_slot(slot); if (results.curl_result != CURLE_OK) { if (missing_target(&results)) { repo->got_indices = 1; goto cleanup; } else { repo->got_indices = 0; ret = error("%s", curl_errorstr); goto cleanup; } } } else { repo->got_indices = 0; ret = error("Unable to start request"); goto cleanup; } data = buffer.buf; while (i < buffer.len) { switch (data[i]) { case 'P': i++; if (i + 52 <= buffer.len && !prefixcmp(data + i, " pack-") && !prefixcmp(data + i + 46, ".pack\n")) { get_sha1_hex(data + i + 6, sha1); setup_index(walker, repo, sha1); i += 51; break; } default: while (i < buffer.len && data[i] != '\n') i++; } i++; } repo->got_indices = 1; cleanup: strbuf_release(&buffer); free(url); return ret; }
extern double *read_markov_model( char *pfile, /* name of probability file */ double *freq, /* letter frequencies */ char *alpha, /* alphabet expected */ BOOLEAN add_x, /* add x-tuples if TRUE */ BOOLEAN rc, /* average reverse complements*/ int *order /* order of model read */ ) { int i; /* index into array */ double a_p[MAX_BACK_SIZE]; /* tuple-prob array */ double *a_cp=NULL; /* conditional prob. array */ FILE *pfilep; /* file pointer to file */ char *line=NULL; /* line buffer */ char **fields=NULL; /* fields of line */ int nfields; /* number of fields in line */ int line_no=0; /* line number */ char *tuple; /* the tuple */ double p; /* the probability */ int maxw=0; /* maximum tuple width */ int alen=strlen(alpha); /* length of alphabet */ int ntuples; /* number of tuples */ /* check input */ if (!pfile && !freq) { fprintf(stderr, "read_markov_model error: specify pfile or freq\n"); exit(1); } /* add 'X 'to the alphabet if requested */ if (add_x) { char *tmp = NULL; Resize(tmp, alen+2, char); strcpy(tmp, alpha); tmp[alen] = 'X'; tmp[alen+1] = '\0'; alpha = tmp; alen++; } /* setup the mapping from ascii to integer and back */ setup_index(alpha); /* use the frequencies if given */ if (freq) { /* frequencies given */ Resize(a_cp, alen, double); for (i=0; i<alen-add_x; i++) RND(freq[i], 8, a_cp[i]); if (add_x) a_cp[i] = 1.0; /* Pr(X) */ /* average reverse complement probabilities together if requested */ if (rc) average_rc(add_x, a_cp, 1, "", 0, alpha); return(a_cp); } /* initialize probability array */ for (i=0; i<MAX_BACK_SIZE; i++) a_p[i] = -1; /* read in the probabilities and save indexed by uppercase tuple name */ if (!(pfilep = fopen(pfile, "r"))) { fprintf(stderr, "Unable to open file %s for reading.\n", pfile); exit(1); } /*fprintf(stderr, "Reading background probabilities...\n");*/ while (1) { /* read file */ int len, index; line_no++; Getline(pfilep, line, len); /* read next line */ if (!line) break; /* at EOF */ if (line[0] == '#') continue; /* skip comments */ Split(line, fields, nfields); /* get tuple and prob */ if (nfields != 2) { fprintf(stderr, "Formatting error in file %s line %d: %s\n", pfile, line_no, line); exit(1); } tuple = fields[0]; p = atof(fields[1]); if (p<0 || p>1) { fprintf(stderr, "Illegal probability in file %s line %d: %s\n", pfile, line_no, line); } len = strlen(tuple); maxw = MAX(len, maxw); index = s2i(tuple); if (index < 0) { fprintf(stderr, "Illegal character in word `%s' in file %s line %d: %s\n", tuple, pfile, line_no, line); exit(1); } if (index >= MAX_BACK_SIZE) { for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i); fprintf(stderr, "Background model too large. Use smaller model or increase \nMAX_BACK_SIZE to at least %d in background.h and recompile.\n", ntuples); exit(1); } a_p[index] = p; /* store probability */ } fclose(pfilep); /* check that all necessary probabilities are defined */ tuple = check_prob(add_x, a_p, maxw, "", 0, alpha); if (tuple) { fprintf(stderr, "File %s gives no probability for %s.\n", pfile, tuple); exit(1); } *order = maxw - 1; /* order of Markov model */ /* average reverse complement probabilities together if requested */ if (rc) average_rc(add_x, a_p, maxw, "", 0, alpha); /* get conditional probabilities */ for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i); a_cp = get_cond_prob(a_p, ntuples); /* print the probabilities */ #ifdef DEBUG print_prob(a_cp, maxw, "", 0, alpha); #endif return(a_cp); /* return conditionals */ } /* read_markov_model */
extern double *get_markov_from_sequence( char *seq, // the raw ASCII sequence const char *alpha, // alphabet expected BOOLEAN rc, // average reverse complements if TRUE int order, // order of Markov model to create double epsilon // pseudocount ) { int i, w; /* setup the mapping from ascii to integer and back */ setup_index(alpha); /* initialize probability array */ int alen=strlen(alpha); /* length of alphabet */ int ntuples = 0; // size of array int maxw = order + 1; for (i=1, ntuples=0; i<=maxw; i++) ntuples+= pow(alen, i); double *a_p = NULL; Resize(a_p, ntuples, double); /* tuple-prob array */ for (i=0; i<ntuples; i++) a_p[i] = epsilon; // set counts to epsilon // initialize the total_counts array int *total_count = NULL; Resize(total_count, maxw+1, int); for (w=1; w<=maxw; w++) total_count[w] = 0; // // Scan the sequence and count tuples of sizes 1 to order+1. // int seqlen = strlen(seq); for (w=1; w<=maxw; w++) { char *tuple; for (i=0, tuple=seq; i<=seqlen-w; i++, tuple++) { char save = tuple[w]; // create the tuple in-line tuple[w] = '\0'; // by putting a null after tuple int index = s2i(tuple); // hash the tuple // skip tuples containing ambiguous characters if (index >= 0) { a_p[index]++; // update count of tuple total_count[w]++; // update count for width w } tuple[w] = save; // remove null } // sequence position } // w // // Convert counts to probabilities // int index = 0; // array index for (w=1; w<=maxw; w++) { int j; // tuple index for (j=0; j<pow(alen, w); j++, index++) { a_p[index] /= (total_count[w] + ((seqlen-w+1)*epsilon)); } // j } // w /* average reverse complement probabilities together if requested */ if (rc) average_rc(1, a_p, maxw, "", 0, alpha); //print_prob(a_p, maxw, "", 0, alpha); /* get conditional probabilities */ double *a_cp = get_cond_prob(a_p, ntuples); //print_prob(a_cp, maxw, "", 0, alpha); return(a_cp); } // get_markov_from_sequence