/* This helper is the main "meat" of the editor -- it does all the work of writing a node record. Write out a node record for PATH of type KIND under EB->FS_ROOT. ACTION describes what is happening to the node (see enum svn_node_action). Write record to writable EB->STREAM, using EB->BUFFER to write in chunks. If the node was itself copied, IS_COPY is TRUE and the path/revision of the copy source are in CMP_PATH/CMP_REV. If IS_COPY is FALSE, yet CMP_PATH/CMP_REV are valid, this node is part of a copied subtree. */ static svn_error_t * dump_node(struct edit_baton *eb, const char *path, svn_node_kind_t kind, enum svn_node_action action, svn_boolean_t is_copy, const char *cmp_path, svn_revnum_t cmp_rev, apr_pool_t *pool) { svn_stringbuf_t *propstring; svn_filesize_t content_length = 0; apr_size_t len; svn_boolean_t must_dump_text = FALSE, must_dump_props = FALSE; const char *compare_path = path; svn_revnum_t compare_rev = eb->current_rev - 1; svn_fs_root_t *compare_root = NULL; apr_file_t *delta_file = NULL; /* Maybe validate the path. */ if (eb->verify || eb->notify_func) { svn_error_t *err = svn_fs__path_valid(path, pool); if (err) { if (eb->notify_func) { char errbuf[512]; /* ### svn_strerror() magic number */ svn_repos_notify_t *notify; notify = svn_repos_notify_create(svn_repos_notify_warning, pool); notify->warning = svn_repos_notify_warning_invalid_fspath; notify->warning_str = apr_psprintf( pool, _("E%06d: While validating fspath '%s': %s"), err->apr_err, path, svn_err_best_message(err, errbuf, sizeof(errbuf))); eb->notify_func(eb->notify_baton, notify, pool); } /* Return the error in addition to notifying about it. */ if (eb->verify) return svn_error_trace(err); else svn_error_clear(err); } } /* Write out metadata headers for this file node. */ SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_PATH ": %s\n", path)); if (kind == svn_node_file) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_KIND ": file\n")); else if (kind == svn_node_dir) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_KIND ": dir\n")); /* Remove leading slashes from copyfrom paths. */ if (cmp_path) cmp_path = svn_relpath_canonicalize(cmp_path, pool); /* Validate the comparison path/rev. */ if (ARE_VALID_COPY_ARGS(cmp_path, cmp_rev)) { compare_path = cmp_path; compare_rev = cmp_rev; } if (action == svn_node_action_change) { SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_ACTION ": change\n")); /* either the text or props changed, or possibly both. */ SVN_ERR(svn_fs_revision_root(&compare_root, svn_fs_root_fs(eb->fs_root), compare_rev, pool)); SVN_ERR(svn_fs_props_changed(&must_dump_props, compare_root, compare_path, eb->fs_root, path, pool)); if (kind == svn_node_file) SVN_ERR(svn_fs_contents_changed(&must_dump_text, compare_root, compare_path, eb->fs_root, path, pool)); } else if (action == svn_node_action_replace) { if (! is_copy) { /* a simple delete+add, implied by a single 'replace' action. */ SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_ACTION ": replace\n")); /* definitely need to dump all content for a replace. */ if (kind == svn_node_file) must_dump_text = TRUE; must_dump_props = TRUE; } else { /* more complex: delete original, then add-with-history. */ /* the path & kind headers have already been printed; just add a delete action, and end the current record.*/ SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_ACTION ": delete\n\n")); /* recurse: print an additional add-with-history record. */ SVN_ERR(dump_node(eb, path, kind, svn_node_action_add, is_copy, compare_path, compare_rev, pool)); /* we can leave this routine quietly now, don't need to dump any content; that was already done in the second record. */ must_dump_text = FALSE; must_dump_props = FALSE; } } else if (action == svn_node_action_delete) { SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_ACTION ": delete\n")); /* we can leave this routine quietly now, don't need to dump any content. */ must_dump_text = FALSE; must_dump_props = FALSE; } else if (action == svn_node_action_add) { SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_ACTION ": add\n")); if (! is_copy) { /* Dump all contents for a simple 'add'. */ if (kind == svn_node_file) must_dump_text = TRUE; must_dump_props = TRUE; } else { if (!eb->verify && cmp_rev < eb->oldest_dumped_rev && eb->notify_func) { svn_repos_notify_t *notify = svn_repos_notify_create(svn_repos_notify_warning, pool); notify->warning = svn_repos_notify_warning_found_old_reference; notify->warning_str = apr_psprintf( pool, _("Referencing data in revision %ld," " which is older than the oldest" " dumped revision (r%ld). Loading this dump" " into an empty repository" " will fail."), cmp_rev, eb->oldest_dumped_rev); eb->found_old_reference = TRUE; eb->notify_func(eb->notify_baton, notify, pool); } SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV ": %ld\n" SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH ": %s\n", cmp_rev, cmp_path)); SVN_ERR(svn_fs_revision_root(&compare_root, svn_fs_root_fs(eb->fs_root), compare_rev, pool)); /* Need to decide if the copied node had any extra textual or property mods as well. */ SVN_ERR(svn_fs_props_changed(&must_dump_props, compare_root, compare_path, eb->fs_root, path, pool)); if (kind == svn_node_file) { svn_checksum_t *checksum; const char *hex_digest; SVN_ERR(svn_fs_contents_changed(&must_dump_text, compare_root, compare_path, eb->fs_root, path, pool)); SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, compare_root, compare_path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_MD5 ": %s\n", hex_digest)); SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, compare_root, compare_path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_SHA1 ": %s\n", hex_digest)); } } } if ((! must_dump_text) && (! must_dump_props)) { /* If we're not supposed to dump text or props, so be it, we can just go home. However, if either one needs to be dumped, then our dumpstream format demands that at a *minimum*, we see a lone "PROPS-END" as a divider between text and props content within the content-block. */ len = 2; return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */ } /*** Start prepping content to dump... ***/ /* If we are supposed to dump properties, write out a property length header and generate a stringbuf that contains those property values here. */ if (must_dump_props) { apr_hash_t *prophash, *oldhash = NULL; apr_size_t proplen; svn_stream_t *propstream; SVN_ERR(svn_fs_node_proplist(&prophash, eb->fs_root, path, pool)); /* If this is a partial dump, then issue a warning if we dump mergeinfo properties that refer to revisions older than the first revision dumped. */ if (!eb->verify && eb->notify_func && eb->oldest_dumped_rev > 1) { svn_string_t *mergeinfo_str = apr_hash_get(prophash, SVN_PROP_MERGEINFO, APR_HASH_KEY_STRING); if (mergeinfo_str) { svn_mergeinfo_t mergeinfo, old_mergeinfo; SVN_ERR(svn_mergeinfo_parse(&mergeinfo, mergeinfo_str->data, pool)); SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges( &old_mergeinfo, mergeinfo, eb->oldest_dumped_rev - 1, 0, TRUE, pool, pool)); if (apr_hash_count(old_mergeinfo)) { svn_repos_notify_t *notify = svn_repos_notify_create(svn_repos_notify_warning, pool); notify->warning = svn_repos_notify_warning_found_old_mergeinfo; notify->warning_str = apr_psprintf( pool, _("Mergeinfo referencing revision(s) prior " "to the oldest dumped revision (r%ld). " "Loading this dump may result in invalid " "mergeinfo."), eb->oldest_dumped_rev); eb->found_old_mergeinfo = TRUE; eb->notify_func(eb->notify_baton, notify, pool); } } } if (eb->use_deltas && compare_root) { /* Fetch the old property hash to diff against and output a header saying that our property contents are a delta. */ SVN_ERR(svn_fs_node_proplist(&oldhash, compare_root, compare_path, pool)); SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_PROP_DELTA ": true\n")); } else oldhash = apr_hash_make(pool); propstring = svn_stringbuf_create_ensure(0, pool); propstream = svn_stream_from_stringbuf(propstring, pool); SVN_ERR(svn_hash_write_incremental(prophash, oldhash, propstream, "PROPS-END", pool)); SVN_ERR(svn_stream_close(propstream)); proplen = propstring->len; content_length += proplen; SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH ": %" APR_SIZE_T_FMT "\n", proplen)); } /* If we are supposed to dump text, write out a text length header here, and an MD5 checksum (if available). */ if (must_dump_text && (kind == svn_node_file)) { svn_checksum_t *checksum; const char *hex_digest; svn_filesize_t textlen; if (eb->use_deltas) { /* Compute the text delta now and write it into a temporary file, so that we can find its length. Output a header saying our text contents are a delta. */ SVN_ERR(store_delta(&delta_file, &textlen, compare_root, compare_path, eb->fs_root, path, pool)); SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_DELTA ": true\n")); if (compare_root) { SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, compare_root, compare_path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_MD5 ": %s\n", hex_digest)); SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, compare_root, compare_path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_SHA1 ": %s\n", hex_digest)); } } else { /* Just fetch the length of the file. */ SVN_ERR(svn_fs_file_length(&textlen, eb->fs_root, path, pool)); } content_length += textlen; SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH ": %" SVN_FILESIZE_T_FMT "\n", textlen)); SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, eb->fs_root, path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_CONTENT_MD5 ": %s\n", hex_digest)); SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, eb->fs_root, path, FALSE, pool)); hex_digest = svn_checksum_to_cstring(checksum, pool); if (hex_digest) SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_TEXT_CONTENT_SHA1 ": %s\n", hex_digest)); } /* 'Content-length:' is the last header before we dump the content, and is the sum of the text and prop contents lengths. We write this only for the benefit of non-Subversion RFC-822 parsers. */ SVN_ERR(svn_stream_printf(eb->stream, pool, SVN_REPOS_DUMPFILE_CONTENT_LENGTH ": %" SVN_FILESIZE_T_FMT "\n\n", content_length)); /* Dump property content if we're supposed to do so. */ if (must_dump_props) { len = propstring->len; SVN_ERR(svn_stream_write(eb->stream, propstring->data, &len)); } /* Dump text content */ if (must_dump_text && (kind == svn_node_file)) { svn_stream_t *contents; if (delta_file) { /* Make sure to close the underlying file when the stream is closed. */ contents = svn_stream_from_aprfile2(delta_file, FALSE, pool); } else SVN_ERR(svn_fs_file_contents(&contents, eb->fs_root, path, pool)); SVN_ERR(svn_stream_copy3(contents, svn_stream_disown(eb->stream, pool), NULL, NULL, pool)); } len = 2; return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */ }
int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; size_t processed; struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); processed = stats->indexed_objects; if ((error = append_to_pack(idx, data, size)) < 0) return error; hash_partially(idx, (unsigned char*) data, (int)size); /* Make sure we set the new size of the pack */ idx->pack->mwf.size += size; if (!idx->parsed_header) { unsigned int total_objects; if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) return 0; if ((error = parse_header(&idx->hdr, idx->pack)) < 0) return error; idx->parsed_header = 1; idx->nr_objects = ntohl(hdr->hdr_entries); idx->off = sizeof(struct git_pack_header); /* for now, limit to 2^32 objects */ assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)) total_objects = (unsigned int)idx->nr_objects; else total_objects = UINT_MAX; idx->pack->idx_cache = git_oidmap_alloc(); GITERR_CHECK_ALLOC(idx->pack->idx_cache); idx->pack->has_cache = 1; if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0) return -1; if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0) return -1; stats->received_objects = 0; stats->local_objects = 0; stats->total_deltas = 0; stats->indexed_deltas = 0; processed = stats->indexed_objects = 0; stats->total_objects = total_objects; if ((error = do_progress_callback(idx, stats)) != 0) return error; } /* Now that we have data in the pack, let's try to parse it */ /* As the file grows any windows we try to use will be out of date */ git_mwindow_free_all(mwf); while (processed < idx->nr_objects) { git_packfile_stream *stream = &idx->stream; git_off_t entry_start = idx->off; size_t entry_size; git_otype type; git_mwindow *w = NULL; if (idx->pack->mwf.size <= idx->off + 20) return 0; if (!idx->have_stream) { error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off); if (error == GIT_EBUFS) { idx->off = entry_start; return 0; } if (error < 0) goto on_error; git_mwindow_close(&w); idx->entry_start = entry_start; git_hash_init(&idx->hash_ctx); if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { error = advance_delta_offset(idx, type); if (error == GIT_EBUFS) { idx->off = entry_start; return 0; } if (error < 0) goto on_error; idx->have_delta = 1; } else { idx->have_delta = 0; hash_header(&idx->hash_ctx, entry_size, type); } idx->have_stream = 1; error = git_packfile_stream_open(stream, idx->pack, idx->off); if (error < 0) goto on_error; } if (idx->have_delta) { error = read_object_stream(idx, stream); } else { error = hash_object_stream(idx, stream); } idx->off = stream->curpos; if (error == GIT_EBUFS) return 0; /* We want to free the stream reasorces no matter what here */ idx->have_stream = 0; git_packfile_stream_free(stream); if (error < 0) goto on_error; if (idx->have_delta) { error = store_delta(idx); } else { error = store_object(idx); } if (error < 0) goto on_error; if (!idx->have_delta) { stats->indexed_objects = (unsigned int)++processed; } stats->received_objects++; if ((error = do_progress_callback(idx, stats)) != 0) goto on_error; } return 0; on_error: git_mwindow_free_all(mwf); return error; }