int faidx_main(int argc, char *argv[]) { if (argc == 1) { fprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\n"); return 1; } else { if (argc == 2) fai_build(argv[1]); else { int i, j, k, l; char *s; faidx_t *fai; fai = fai_load(argv[1]); if (fai == 0) return 1; for (i = 2; i != argc; ++i) { printf(">%s\n", argv[i]); s = fai_fetch(fai, argv[i], &l); for (j = 0; j < l; j += 60) { for (k = 0; k < 60 && k < l - j; ++k) putchar(s[j + k]); putchar('\n'); } free(s); } fai_destroy(fai); } } return 0; }
faidx_t *fai_load(const char *fn) { char *str; FILE *fp; faidx_t *fai; str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); #ifdef _USE_KNETFILE if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn) { fp = download_and_open(str); if ( !fp ) { fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str); free(str); return 0; } } else #endif fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] build FASTA index.\n"); fai_build(fn); fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] fail to open FASTA index.\n"); free(str); return 0; } } fai = fai_read(fp); fclose(fp); fai->bgzf = bgzf_open(fn, "rb"); free(str); if (fai->bgzf == 0) { fprintf(stderr, "[fai_load] fail to open FASTA file.\n"); return 0; } if ( fai->bgzf->is_compressed==1 ) { if ( bgzf_index_load(fai->bgzf, fn, ".gzi") < 0 ) { fprintf(stderr, "[fai_load] failed to load .gzi index: %s[.gzi]\n", fn); fai_destroy(fai); return NULL; } } return fai; }
void ReadDB::build(const std::string& input_reads_filename) { // generate output filename m_indexed_reads_filename = input_reads_filename + GZIPPED_READS_SUFFIX; // Populate database with read names and convert the fastq // input into fasta for faidx import_reads(input_reads_filename, m_indexed_reads_filename); // build faidx int ret = fai_build(m_indexed_reads_filename.c_str()); if(ret != 0) { fprintf(stderr, "Error running faidx_build on %s\n", m_indexed_reads_filename.c_str()); exit(EXIT_FAILURE); } m_fai = NULL; }
faidx_t *fai_load(const char *fn) { char *str; FILE *fp; faidx_t *fai; str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); #ifdef _USE_KNETFILE if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn) { fp = download_and_open(str); if ( !fp ) { fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str); free(str); return 0; } } else #endif fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] build FASTA index.\n"); fai_build(fn); fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] fail to open FASTA index.\n"); free(str); return 0; } } fai = fai_read(fp); fclose(fp); fai->rz = razf_open(fn, "rb"); free(str); if (fai->rz == 0) { fprintf(stderr, "[fai_load] fail to open FASTA file.\n"); return 0; } return fai; }
char *samfaipath(const char *fn_ref) { char *fn_list = 0; if (fn_ref == 0) return 0; fn_list = calloc(strlen(fn_ref) + 5, 1); strcat(strcpy(fn_list, fn_ref), ".fai"); if (access(fn_list, R_OK) == -1) { // fn_list is unreadable if (access(fn_ref, R_OK) == -1) { fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); } else { if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n"); if (fai_build(fn_ref) == -1) { fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); free(fn_list); fn_list = 0; } } } return fn_list; }
static void faidx1(const char *filename) { int n, n_exp = 0, n_fq_exp = 0; char tmpfilename[FILENAME_MAX], line[500]; FILE *fin, *fout; faidx_t *fai; fin = fopen(filename, "rb"); if (fin == NULL) fail("can't open %s\n", filename); sprintf(tmpfilename, "%s.tmp", filename); fout = fopen(tmpfilename, "wb"); if (fout == NULL) fail("can't create temporary %s\n", tmpfilename); while (fgets(line, sizeof line, fin)) { if (line[0] == '>') n_exp++; if (line[0] == '+' && line[1] == '\n') n_fq_exp++; fputs(line, fout); } fclose(fin); fclose(fout); if (n_exp == 0 && n_fq_exp != 0) { // probably a fastq file n_exp = n_fq_exp; } if (fai_build(tmpfilename) < 0) fail("can't index %s", tmpfilename); fai = fai_load(tmpfilename); if (fai == NULL) { fail("can't load faidx file %s", tmpfilename); return; } n = faidx_fetch_nseq(fai); if (n != n_exp) fail("%s: faidx_fetch_nseq returned %d, expected %d", filename, n, n_exp); n = faidx_nseq(fai); if (n != n_exp) fail("%s: faidx_nseq returned %d, expected %d", filename, n, n_exp); fai_destroy(fai); }
explicit MMappedFastaFile(std::string const & _filename) : filename(_filename) { struct stat st; stat(_filename.c_str(), &st); filesize = (size_t) st.st_size; fd = open(_filename.c_str(), O_RDONLY, 0); assert(fd != -1); #if __APPLE__ base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0); #else base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0); #endif if(base == MAP_FAILED) { const int err = errno; error("Cannot mmap %s (errno=%i / %s) -- do you have enough memory available?", _filename.c_str(), err, strerror(err)); } auto fai_fp = fopen((filename + ".fai").c_str(), "r"); if(!fai_fp) { assert(fai_build(filename.c_str()) == 0); } else { fclose(fai_fp); } std::ifstream index_in(filename + ".fai"); while(index_in.good()) { std::string line; std::getline(index_in, line); std::vector<std::string> parts; stringutil::split(line, parts, "\n\t", false); if(parts.size() == 5) { const std::string contig = parts[0]; index_entry ientry; sscanf(parts[1].c_str(), "%zu", &ientry.length); sscanf(parts[2].c_str(), "%zu", &ientry.start_offset); sscanf(parts[3].c_str(), "%zu", &ientry.chars_per_line); sscanf(parts[4].c_str(), "%zu", &ientry.bytes_per_line); fai[contig] = ientry; // determine non-N length; we don't do this because it's pretty slow. // size_t offset = ientry.start_offset; // const size_t line_number = (ientry.length - 1) / ientry.chars_per_line; // size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line; // size_t offset_end = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1; // size_t ns = 0; // // while(offset < offset_end) // { // if(std::tolower(base[offset - 1]) == 'n') // { // ++ns; // } // ++offset; // } // fai[contig].non_n_length = fai[contig].length - ns; // length trimming off N's at start and end size_t pos = 0; size_t ns_at_start = 0; bool done = false; // start of contig while(pos < ientry.length && !done) { const std::string s = get(contig, pos, 10000); for(size_t j = 0; j < s.size(); ++j) { if(std::tolower(s.at(j)) == 'n') { ++ns_at_start; } else { done = true; break; } } pos += s.size(); } size_t ns_at_end = 0; // check if we had all Ns if(ns_at_start < ientry.length) { const size_t line_number = (ientry.length - 1) / ientry.chars_per_line; size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line; size_t offset = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1; // end of contig while(offset > ientry.start_offset) { if (base[offset - 1] == '\n' || base[offset - 1] == '\r') { // skip newlines } else if(std::tolower(base[offset - 1]) == 'n') { ++ns_at_end; } else { break; } --offset; } } fai[contig].non_n_length = fai[contig].length - (ns_at_start + ns_at_end); } else if(parts.size() > 0) { error("invalid fai line %s in %s", line.c_str(), (filename + ".fai").c_str()); } } }