bool git_buf_text_is_binary(const git_buf *buf) { const char *scan = buf->ptr, *end = buf->ptr + buf->size; git_bom_t bom; int printable = 0, nonprintable = 0; scan += git_buf_text_detect_bom(&bom, buf, 0); if (bom > GIT_BOM_UTF8) return 1; while (scan < end) { unsigned char c = *scan++; /* Printable characters are those above SPACE (0x1F) excluding DEL, * and including BS, ESC and FF. */ if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') printable++; else if (c == '\0') return true; else if (!git__isspace(c)) nonprintable++; } return ((printable >> 7) < nonprintable); }
bool git_buf_text_gather_stats( git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) { const char *scan = buf->ptr, *end = buf->ptr + buf->size; int skip; memset(stats, 0, sizeof(*stats)); /* BOM detection */ skip = git_buf_text_detect_bom(&stats->bom, buf, 0); if (skip_bom) scan += skip; /* Ignore EOF character */ if (buf->size > 0 && end[-1] == '\032') end--; /* Counting loop */ while (scan < end) { unsigned char c = *scan++; if (c > 0x1F && c != 0x7F) stats->printable++; else switch (c) { case '\0': stats->nul++; stats->nonprintable++; break; case '\n': stats->lf++; break; case '\r': stats->cr++; if (scan < end && *scan == '\n') stats->crlf++; break; case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ stats->printable++; break; default: stats->nonprintable++; break; } } return (stats->nul > 0 || ((stats->printable >> 7) < stats->nonprintable)); }
static int skip_bom(git_parse_ctx *parser) { git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len); git_bom_t bom; int bom_offset = git_buf_text_detect_bom(&bom, &buf, parser->content_len); if (bom == GIT_BOM_UTF8) git_parse_advance_chars(parser, bom_offset); /* TODO: reference implementation is pretty stupid with BoM */ return 0; }
int git_attr_file__load( git_attr_file **out, git_repository *repo, git_attr_session *attr_session, git_attr_file_entry *entry, git_attr_file_source source, git_attr_file_parser parser) { int error = 0; git_blob *blob = NULL; git_buf content = GIT_BUF_INIT; const char *content_str; git_attr_file *file; struct stat st; bool nonexistent = false; int bom_offset; git_bom_t bom; *out = NULL; switch (source) { case GIT_ATTR_FILE__IN_MEMORY: /* in-memory attribute file doesn't need data */ break; case GIT_ATTR_FILE__FROM_INDEX: { git_oid id; git_off_t blobsize; if ((error = attr_file_oid_from_index(&id, repo, entry->path)) < 0 || (error = git_blob_lookup(&blob, repo, &id)) < 0) return error; /* Do not assume that data straight from the ODB is NULL-terminated; * copy the contents of a file to a buffer to work on */ blobsize = git_blob_rawsize(blob); GIT_ERROR_CHECK_BLOBSIZE(blobsize); git_buf_put(&content, git_blob_rawcontent(blob), (size_t)blobsize); break; } case GIT_ATTR_FILE__FROM_FILE: { int fd = -1; /* For open or read errors, pretend that we got ENOTFOUND. */ /* TODO: issue warning when warning API is available */ if (p_stat(entry->fullpath, &st) < 0 || S_ISDIR(st.st_mode) || (fd = git_futils_open_ro(entry->fullpath)) < 0 || (error = git_futils_readbuffer_fd(&content, fd, (size_t)st.st_size)) < 0) nonexistent = true; if (fd >= 0) p_close(fd); break; } default: git_error_set(GIT_ERROR_INVALID, "unknown file source %d", source); return -1; } if ((error = git_attr_file__new(&file, entry, source)) < 0) goto cleanup; /* advance over a UTF8 BOM */ content_str = git_buf_cstr(&content); bom_offset = git_buf_text_detect_bom(&bom, &content); if (bom == GIT_BOM_UTF8) content_str += bom_offset; /* store the key of the attr_reader; don't bother with cache * invalidation during the same attr reader session. */ if (attr_session) file->session_key = attr_session->key; if (parser && (error = parser(repo, file, content_str)) < 0) { git_attr_file__free(file); goto cleanup; } /* write cache breakers */ if (nonexistent) file->nonexistent = 1; else if (source == GIT_ATTR_FILE__FROM_INDEX) git_oid_cpy(&file->cache_data.oid, git_blob_id(blob)); else if (source == GIT_ATTR_FILE__FROM_FILE) git_futils_filestamp_set_from_stat(&file->cache_data.stamp, &st); /* else always cacheable */ *out = file; cleanup: git_blob_free(blob); git_buf_dispose(&content); return error; }