faidx_t *fai_read(FILE *fp) { faidx_t *fai; char *buf, *p; int len, line_len, line_blen; #ifdef _WIN32 long offset; #else long long offset; #endif fai = (faidx_t*)calloc(1, sizeof(faidx_t)); fai->hash = kh_init(s); buf = (char*)calloc(0x10000, 1); while (!feof(fp) && fgets(buf, 0x10000, fp)) { for (p = buf; *p && isgraph(*p); ++p); *p = 0; ++p; #ifdef _WIN32 sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len); #else sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len); #endif fai_insert_index(fai, buf, len, line_len, line_blen, offset); } free(buf); return fai; }
faidx_t *fai_build_core(RAZF *rz) { char c, *name; int l_name, m_name, ret; int len, line_len, line_blen, state; int l1, l2; faidx_t *idx; uint64_t offset; idx = (faidx_t*)calloc(1, sizeof(faidx_t)); idx->hash = kh_init(s); name = 0; l_name = m_name = 0; len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0; while (razf_read(rz, &c, 1)) { if (c == '\n') { // an empty line if (state == 1) { offset = razf_tell(rz); continue; } else if ((state == 0 && len < 0) || state == 2) continue; } if (c == '>') { // fasta header if (len >= 0) fai_insert_index(idx, name, len, line_len, line_blen, offset); l_name = 0; while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) { if (m_name < l_name + 2) { m_name = l_name + 2; kroundup32(m_name); name = (char*)realloc(name, m_name); } name[l_name++] = c; } name[l_name] = '\0'; if (ret == 0) { fprintf(stderr, "[fai_build_core] the last entry has no sequence\n"); free(name); fai_destroy(idx); return 0; } if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n'); state = 1; len = 0; offset = razf_tell(rz); } else { if (state == 3) { fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } if (state == 2) state = 3; l1 = l2 = 0; do { ++l1; if (isgraph(c)) ++l2; } while ((ret = razf_read(rz, &c, 1)) && c != '\n'); if (state == 3 && l2) { fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } ++l1; len += l2; if (l2 >= 0x10000) { fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } if (state == 1) line_len = l1, line_blen = l2, state = 0; else if (state == 0) { if (l1 != line_len || l2 != line_blen) state = 2; } } } fai_insert_index(idx, name, len, line_len, line_blen, offset); free(name); return idx; }
faidx_t *fai_build_core(BGZF *bgzf) { char *name; int c; int l_name, m_name; int line_len, line_blen, state; int l1, l2; faidx_t *idx; uint64_t offset; int64_t len; idx = (faidx_t*)calloc(1, sizeof(faidx_t)); name = (char*)calloc(1, sizeof(char)); /* at least 1 byte, for '\0' */ idx->hash = kh_init(s); l_name = m_name = 0; len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0; while ( (c=bgzf_getc(bgzf))>=0 ) { if (c == '\n') { // an empty line if (state == 1) { offset = bgzf_utell(bgzf); continue; } else if ((state == 0 && len < 0) || state == 2) continue; } if (c == '>') { // fasta header if (len >= 0) fai_insert_index(idx, name, len, line_len, line_blen, offset); l_name = 0; while ( (c=bgzf_getc(bgzf))>=0 && !isspace(c)) { if (m_name < l_name + 2) { m_name = l_name + 2; kroundup32(m_name); name = (char*)realloc(name, m_name); } name[l_name++] = c; } name[l_name] = '\0'; if ( c<0 ) { fprintf(stderr, "[fai_build_core] the last entry has no sequence\n"); free(name); fai_destroy(idx); return 0; } if (c != '\n') while ( (c=bgzf_getc(bgzf))>=0 && c != '\n'); state = 1; len = 0; offset = bgzf_utell(bgzf); } else { if (state == 3) { fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } if (state == 2) state = 3; l1 = l2 = 0; do { ++l1; if (isgraph(c)) ++l2; } while ( (c=bgzf_getc(bgzf))>=0 && c != '\n'); if (state == 3 && l2) { fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name); free(name); fai_destroy(idx); return 0; } ++l1; len += l2; if (state == 1) line_len = l1, line_blen = l2, state = 0; else if (state == 0) { if (l1 != line_len || l2 != line_blen) state = 2; } } } if ( name ) fai_insert_index(idx, name, len, line_len, line_blen, offset); else { free(idx); return NULL; } free(name); return idx; }