Пример #1
0
faidx_t *fai_read(FILE *fp)
{
	faidx_t *fai;
	char *buf, *p;
	int len, line_len, line_blen;
#ifdef _WIN32
	long offset;
#else
	long long offset;
#endif
	fai = (faidx_t*)calloc(1, sizeof(faidx_t));
	fai->hash = kh_init(s);
	buf = (char*)calloc(0x10000, 1);
	while (!feof(fp) && fgets(buf, 0x10000, fp)) {
		for (p = buf; *p && isgraph(*p); ++p);
		*p = 0; ++p;
#ifdef _WIN32
		sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
#else
		sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len);
#endif
		fai_insert_index(fai, buf, len, line_len, line_blen, offset);
	}
	free(buf);
	return fai;
}
Пример #2
0
faidx_t *fai_build_core(RAZF *rz)
{
	char c, *name;
	int l_name, m_name, ret;
	int len, line_len, line_blen, state;
	int l1, l2;
	faidx_t *idx;
	uint64_t offset;

	idx = (faidx_t*)calloc(1, sizeof(faidx_t));
	idx->hash = kh_init(s);
	name = 0; l_name = m_name = 0;
	len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
	while (razf_read(rz, &c, 1)) {
		if (c == '\n') { // an empty line
			if (state == 1) {
				offset = razf_tell(rz);
				continue;
			} else if ((state == 0 && len < 0) || state == 2) continue;
		}
		if (c == '>') { // fasta header
			if (len >= 0)
				fai_insert_index(idx, name, len, line_len, line_blen, offset);
			l_name = 0;
			while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) {
				if (m_name < l_name + 2) {
					m_name = l_name + 2;
					kroundup32(m_name);
					name = (char*)realloc(name, m_name);
				}
				name[l_name++] = c;
			}
			name[l_name] = '\0';
			if (ret == 0) {
				fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
				free(name); fai_destroy(idx);
				return 0;
			}
			if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n');
			state = 1; len = 0;
			offset = razf_tell(rz);
		} else {
			if (state == 3) {
				fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			if (state == 2) state = 3;
			l1 = l2 = 0;
			do {
				++l1;
				if (isgraph(c)) ++l2;
			} while ((ret = razf_read(rz, &c, 1)) && c != '\n');
			if (state == 3 && l2) {
				fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			++l1; len += l2;
			if (l2 >= 0x10000) {
				fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'.\n", name);
				free(name); fai_destroy(idx);
				return 0;
			}
			if (state == 1) line_len = l1, line_blen = l2, state = 0;
			else if (state == 0) {
				if (l1 != line_len || l2 != line_blen) state = 2;
			}
		}
	}
	fai_insert_index(idx, name, len, line_len, line_blen, offset);
	free(name);
	return idx;
}
Пример #3
0
faidx_t *fai_build_core(BGZF *bgzf)
{
    char *name;
    int c;
    int l_name, m_name;
    int line_len, line_blen, state;
    int l1, l2;
    faidx_t *idx;
    uint64_t offset;
    int64_t len;

    idx = (faidx_t*)calloc(1, sizeof(faidx_t));
    name = (char*)calloc(1, sizeof(char)); /* at least 1 byte, for '\0' */
    idx->hash = kh_init(s);
    l_name = m_name = 0;
    len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
    while ( (c=bgzf_getc(bgzf))>=0 ) {
        if (c == '\n') { // an empty line
            if (state == 1) {
                offset = bgzf_utell(bgzf);
                continue;
            } else if ((state == 0 && len < 0) || state == 2) continue;
        }
        if (c == '>') { // fasta header
            if (len >= 0)
                fai_insert_index(idx, name, len, line_len, line_blen, offset);
            l_name = 0;
            while ( (c=bgzf_getc(bgzf))>=0 && !isspace(c)) {
                if (m_name < l_name + 2) {
                    m_name = l_name + 2;
                    kroundup32(m_name);
                    name = (char*)realloc(name, m_name);
                }
                name[l_name++] = c;
            }
            name[l_name] = '\0';
            if ( c<0 ) {
                fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
                free(name); fai_destroy(idx);
                return 0;
            }
            if (c != '\n') while ( (c=bgzf_getc(bgzf))>=0 && c != '\n');
            state = 1; len = 0;
            offset = bgzf_utell(bgzf);
        } else {
            if (state == 3) {
                fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
                free(name); fai_destroy(idx);
                return 0;
            }
            if (state == 2) state = 3;
            l1 = l2 = 0;
            do {
                ++l1;
                if (isgraph(c)) ++l2;
            } while ( (c=bgzf_getc(bgzf))>=0 && c != '\n');
            if (state == 3 && l2) {
                fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
                free(name); fai_destroy(idx);
                return 0;
            }
            ++l1; len += l2;
            if (state == 1) line_len = l1, line_blen = l2, state = 0;
            else if (state == 0) {
                if (l1 != line_len || l2 != line_blen) state = 2;
            }
        }
    }
    if ( name )
        fai_insert_index(idx, name, len, line_len, line_blen, offset);
    else
    {
        free(idx);
        return NULL;
    }
    free(name);
    return idx;
}