Example #1
0
int faidx_main(int argc, char *argv[])
{
	if (argc == 1) {
		fprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\n");
		return 1;
	} else {
		if (argc == 2) fai_build(argv[1]);
		else {
			int i, j, k, l;
			char *s;
			faidx_t *fai;
			fai = fai_load(argv[1]);
			if (fai == 0) return 1;
			for (i = 2; i != argc; ++i) {
				printf(">%s\n", argv[i]);
				s = fai_fetch(fai, argv[i], &l);
				for (j = 0; j < l; j += 60) {
					for (k = 0; k < 60 && k < l - j; ++k)
						putchar(s[j + k]);
					putchar('\n');
				}
				free(s);
			}
			fai_destroy(fai);
		}
	}
	return 0;
}
Example #2
0
faidx_t *fai_load(const char *fn)
{
    char *str;
    FILE *fp;
    faidx_t *fai;
    str = (char*)calloc(strlen(fn) + 5, 1);
    sprintf(str, "%s.fai", fn);

#ifdef _USE_KNETFILE
    if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
    {
        fp = download_and_open(str);
        if ( !fp )
        {
            fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
            free(str);
            return 0;
        }
    }
    else
#endif
        fp = fopen(str, "rb");
    if (fp == 0) {
        fprintf(stderr, "[fai_load] build FASTA index.\n");
        fai_build(fn);
        fp = fopen(str, "rb");
        if (fp == 0) {
            fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
            free(str);
            return 0;
        }
    }

    fai = fai_read(fp);
    fclose(fp);

    fai->bgzf = bgzf_open(fn, "rb");
    free(str);
    if (fai->bgzf == 0) {
        fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
        return 0;
    }
    if ( fai->bgzf->is_compressed==1 )
    {
        if ( bgzf_index_load(fai->bgzf, fn, ".gzi") < 0 )
        {
            fprintf(stderr, "[fai_load] failed to load .gzi index: %s[.gzi]\n", fn);
            fai_destroy(fai);
            return NULL;
        }
    }
    return fai;
}
Example #3
0
void ReadDB::build(const std::string& input_reads_filename)
{
    // generate output filename
    m_indexed_reads_filename = input_reads_filename + GZIPPED_READS_SUFFIX;

    // Populate database with read names and convert the fastq
    // input into fasta for faidx
    import_reads(input_reads_filename, m_indexed_reads_filename);

    // build faidx
    int ret = fai_build(m_indexed_reads_filename.c_str());
    if(ret != 0) {
        fprintf(stderr, "Error running faidx_build on %s\n", m_indexed_reads_filename.c_str());
        exit(EXIT_FAILURE);
    }

    m_fai = NULL;
}
Example #4
0
faidx_t *fai_load(const char *fn)
{
	char *str;
	FILE *fp;
	faidx_t *fai;
	str = (char*)calloc(strlen(fn) + 5, 1);
	sprintf(str, "%s.fai", fn);

#ifdef _USE_KNETFILE
    if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
    {
        fp = download_and_open(str);
        if ( !fp )
        {
            fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
            free(str);
            return 0;
        }
    }
    else
#endif
        fp = fopen(str, "rb");
	if (fp == 0) {
		fprintf(stderr, "[fai_load] build FASTA index.\n");
		fai_build(fn);
		fp = fopen(str, "rb");
		if (fp == 0) {
			fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
			free(str);
			return 0;
		}
	}

	fai = fai_read(fp);
	fclose(fp);

	fai->rz = razf_open(fn, "rb");
	free(str);
	if (fai->rz == 0) {
		fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
		return 0;
	}
	return fai;
}
char *samfaipath(const char *fn_ref)
{
	char *fn_list = 0;
	if (fn_ref == 0) return 0;
	fn_list = calloc(strlen(fn_ref) + 5, 1);
	strcat(strcpy(fn_list, fn_ref), ".fai");
	if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
		if (access(fn_ref, R_OK) == -1) {
			fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
		} else {
			if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n");
			if (fai_build(fn_ref) == -1) {
				fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
				free(fn_list); fn_list = 0;
			}
		}
	}
	return fn_list;
}
Example #6
0
File: sam.c Project: atks/vt
static void faidx1(const char *filename)
{
    int n, n_exp = 0, n_fq_exp = 0;
    char tmpfilename[FILENAME_MAX], line[500];
    FILE *fin, *fout;
    faidx_t *fai;

    fin = fopen(filename, "rb");
    if (fin == NULL) fail("can't open %s\n", filename);
    sprintf(tmpfilename, "%s.tmp", filename);
    fout = fopen(tmpfilename, "wb");
    if (fout == NULL) fail("can't create temporary %s\n", tmpfilename);
    while (fgets(line, sizeof line, fin)) {
        if (line[0] == '>') n_exp++;
        if (line[0] == '+' && line[1] == '\n') n_fq_exp++;
        fputs(line, fout);
    }
    fclose(fin);
    fclose(fout);

    if (n_exp == 0 && n_fq_exp != 0) {
        // probably a fastq file
        n_exp = n_fq_exp;
    }

    if (fai_build(tmpfilename) < 0) fail("can't index %s", tmpfilename);
    fai = fai_load(tmpfilename);
    if (fai == NULL) { fail("can't load faidx file %s", tmpfilename); return; }

    n = faidx_fetch_nseq(fai);
    if (n != n_exp)
        fail("%s: faidx_fetch_nseq returned %d, expected %d", filename, n, n_exp);

    n = faidx_nseq(fai);
    if (n != n_exp)
        fail("%s: faidx_nseq returned %d, expected %d", filename, n, n_exp);

    fai_destroy(fai);
}
Example #7
0
    explicit MMappedFastaFile(std::string const & _filename) :
        filename(_filename)
    {
        struct stat st;
        stat(_filename.c_str(), &st);
        filesize = (size_t) st.st_size;
        fd = open(_filename.c_str(), O_RDONLY, 0);
        assert(fd != -1);
#if __APPLE__
        base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
#else
        base = (uint8_t *) mmap(NULL, filesize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0);
#endif
        if(base == MAP_FAILED)
        {
            const int err = errno;
            error("Cannot mmap %s (errno=%i / %s) -- do you have enough memory available?",
                  _filename.c_str(), err, strerror(err));
        }

        auto fai_fp = fopen((filename + ".fai").c_str(), "r");
        if(!fai_fp)
        {
            assert(fai_build(filename.c_str()) == 0);
        }
        else
        {
            fclose(fai_fp);
        }

        std::ifstream index_in(filename + ".fai");
        while(index_in.good())
        {
            std::string line;
            std::getline(index_in, line);
            std::vector<std::string> parts;
            stringutil::split(line, parts, "\n\t", false);
            if(parts.size() == 5)
            {
                const std::string contig = parts[0];
                index_entry ientry;
                sscanf(parts[1].c_str(), "%zu", &ientry.length);
                sscanf(parts[2].c_str(), "%zu", &ientry.start_offset);
                sscanf(parts[3].c_str(), "%zu", &ientry.chars_per_line);
                sscanf(parts[4].c_str(), "%zu", &ientry.bytes_per_line);
                fai[contig] = ientry;

                // determine non-N length; we don't do this because it's pretty slow.
//                size_t offset = ientry.start_offset;
//                const size_t line_number = (ientry.length - 1) / ientry.chars_per_line;
//                size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line;
//                size_t offset_end = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1;
//                size_t ns = 0;
//
//                while(offset < offset_end)
//                {
//                    if(std::tolower(base[offset - 1]) == 'n')
//                    {
//                        ++ns;
//                    }
//                    ++offset;
//                }
//                fai[contig].non_n_length = fai[contig].length - ns;

                // length trimming off N's at start and end
                size_t pos = 0;
                size_t ns_at_start = 0;
                bool done = false;

                // start of contig
                while(pos < ientry.length && !done)
                {
                    const std::string s = get(contig, pos, 10000);

                    for(size_t j = 0; j < s.size(); ++j)
                    {
                        if(std::tolower(s.at(j)) == 'n')
                        {
                            ++ns_at_start;
                        }
                        else
                        {
                            done = true;
                            break;
                        }
                    }

                    pos += s.size();
                }

                size_t ns_at_end = 0;
                // check if we had all Ns
                if(ns_at_start < ientry.length)
                {
                    const size_t line_number = (ientry.length - 1) / ientry.chars_per_line;
                    size_t offset_in_line = (ientry.length - 1) % ientry.chars_per_line;
                    size_t offset = ientry.start_offset + line_number*ientry.bytes_per_line + offset_in_line + 1;

                    // end of contig
                    while(offset > ientry.start_offset)
                    {
                        if (base[offset - 1] == '\n' || base[offset - 1] == '\r')
                        {
                            // skip newlines
                        }
                        else if(std::tolower(base[offset - 1]) == 'n')
                        {
                            ++ns_at_end;
                        }
                        else
                        {
                            break;
                        }
                        --offset;
                    }
                }
                fai[contig].non_n_length = fai[contig].length - (ns_at_start + ns_at_end);
            }
            else if(parts.size() > 0)
            {
                error("invalid fai line %s in %s", line.c_str(), (filename + ".fai").c_str());
            }
        }
    }