From 4631e22f925fa2af8d8548af97ee2215be101409 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Wed, 18 Feb 2026 23:23:40 +0530 Subject: [PATCH 01/21] wt-status: pass struct repository through function parameters Some functions in wt-status.c (count_stash_entries(), read_line_from_git_path(), abbrev_oid_in_line(), and read_rebase_todolist()) rely on the_repository as they do not have access to a local repository instance. Add a struct repository *r parameter to these functions and pass the local repository instance through the callers, which already have access to it either directly by struct repository *r or indirectly by struct wt_state *s (s->repo). Replace uses of the_repository in these functions with the passed parameter. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- wt-status.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/wt-status.c b/wt-status.c index e12adb26b9f8eb..97c2e10a8cefcc 100644 --- a/wt-status.c +++ b/wt-status.c @@ -984,17 +984,17 @@ static int stash_count_refs(const char *refname UNUSED, return 0; } -static int count_stash_entries(void) +static int count_stash_entries(struct repository *r) { int n = 0; - refs_for_each_reflog_ent(get_main_ref_store(the_repository), + refs_for_each_reflog_ent(get_main_ref_store(r), "refs/stash", stash_count_refs, &n); return n; } static void wt_longstatus_print_stash_summary(struct wt_status *s) { - int stash_count = count_stash_entries(); + int stash_count = count_stash_entries(s->repo); if (stash_count > 0) status_printf_ln(s, GIT_COLOR_NORMAL, @@ -1287,10 +1287,10 @@ static void show_am_in_progress(struct wt_status *s, wt_longstatus_print_trailer(s); } -static char *read_line_from_git_path(const char *filename) +static char *read_line_from_git_path(struct repository *r, const char *filename) { struct strbuf buf = STRBUF_INIT; - FILE *fp = fopen_or_warn(repo_git_path_append(the_repository, &buf, + FILE *fp = fopen_or_warn(repo_git_path_append(r, &buf, "%s", filename), "r"); if (!fp) { @@ -1325,8 +1325,8 @@ static int split_commit_in_progress(struct wt_status *s) if (head_flags & REF_ISSYMREF || orig_head_flags & REF_ISSYMREF) return 0; - rebase_amend = read_line_from_git_path("rebase-merge/amend"); - rebase_orig_head = read_line_from_git_path("rebase-merge/orig-head"); + rebase_amend = read_line_from_git_path(s->repo, "rebase-merge/amend"); + rebase_orig_head = read_line_from_git_path(s->repo, "rebase-merge/orig-head"); if (!rebase_amend || !rebase_orig_head) ; /* fall through, no split in progress */ @@ -1350,7 +1350,7 @@ static int split_commit_in_progress(struct wt_status *s) * The function assumes that the line does not contain useless spaces * before or after the command. */ -static void abbrev_oid_in_line(struct strbuf *line) +static void abbrev_oid_in_line(struct repository *r, struct strbuf *line) { struct string_list split = STRING_LIST_INIT_DUP; struct object_id oid; @@ -1362,7 +1362,7 @@ static void abbrev_oid_in_line(struct strbuf *line) return; if ((2 <= string_list_split(&split, line->buf, " ", 2)) && - !repo_get_oid(the_repository, split.items[1].string, &oid)) { + !repo_get_oid(r, split.items[1].string, &oid)) { strbuf_reset(line); strbuf_addf(line, "%s ", split.items[0].string); strbuf_add_unique_abbrev(line, &oid, DEFAULT_ABBREV); @@ -1372,10 +1372,10 @@ static void abbrev_oid_in_line(struct strbuf *line) string_list_clear(&split, 0); } -static int read_rebase_todolist(const char *fname, struct string_list *lines) +static int read_rebase_todolist(struct repository *r, const char *fname, struct string_list *lines) { struct strbuf buf = STRBUF_INIT; - FILE *f = fopen(repo_git_path_append(the_repository, &buf, "%s", fname), "r"); + FILE *f = fopen(repo_git_path_append(r, &buf, "%s", fname), "r"); int ret; if (!f) { @@ -1384,7 +1384,7 @@ static int read_rebase_todolist(const char *fname, struct string_list *lines) goto out; } die_errno("Could not open file %s for reading", - repo_git_path_replace(the_repository, &buf, "%s", fname)); + repo_git_path_replace(r, &buf, "%s", fname)); } while (!strbuf_getline_lf(&buf, f)) { if (starts_with(buf.buf, comment_line_str)) @@ -1392,7 +1392,7 @@ static int read_rebase_todolist(const char *fname, struct string_list *lines) strbuf_trim(&buf); if (!buf.len) continue; - abbrev_oid_in_line(&buf); + abbrev_oid_in_line(r, &buf); string_list_append(lines, buf.buf); } fclose(f); @@ -1413,8 +1413,8 @@ static void show_rebase_information(struct wt_status *s, struct string_list have_done = STRING_LIST_INIT_DUP; struct string_list yet_to_do = STRING_LIST_INIT_DUP; - read_rebase_todolist("rebase-merge/done", &have_done); - if (read_rebase_todolist("rebase-merge/git-rebase-todo", + read_rebase_todolist(s->repo, "rebase-merge/done", &have_done); + if (read_rebase_todolist(s->repo, "rebase-merge/git-rebase-todo", &yet_to_do)) status_printf_ln(s, color, _("git-rebase-todo is missing.")); @@ -2259,7 +2259,7 @@ static void wt_porcelain_v2_print_tracking(struct wt_status *s) */ static void wt_porcelain_v2_print_stash(struct wt_status *s) { - int stash_count = count_stash_entries(); + int stash_count = count_stash_entries(s->repo); char eol = s->null_termination ? '\0' : '\n'; if (stash_count > 0) From 9d0d2ba217f3ceefb0315b556f012edb598b9724 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Wed, 18 Feb 2026 23:23:41 +0530 Subject: [PATCH 02/21] wt-status: replace uses of the_repository with local repository instances wt-status.c uses the global the_repository in several places even when a repository instance is already available via struct wt_status *s or struct repository *r. Replace these uses of the_repository with the repository available in the local context (i.e. s->repo or r). The replacements of all the_repository with s->repo are mostly to cases where a repository instance is already available via struct wt_status *s and struct repository *r, all functions operating on struct wt_status *s are only used after s is initialized by wt_status_prepare(), which sets s->repo from the repository provided by the caller. As a result, s->repo is guaranteed to be available and consistent whenever these functions are invoked. This reduces reliance on global state and keeps wt-status consistent, though many functions operating on struct wt_status *s are called via commit.c and it still relies on the_repository, but within wt-status.c the local repository pointer refers to the same underlying repository object. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- wt-status.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/wt-status.c b/wt-status.c index 97c2e10a8cefcc..b44b8377e5be12 100644 --- a/wt-status.c +++ b/wt-status.c @@ -150,11 +150,11 @@ void wt_status_prepare(struct repository *r, struct wt_status *s) s->show_untracked_files = SHOW_NORMAL_UNTRACKED_FILES; s->use_color = GIT_COLOR_UNKNOWN; s->relative_paths = 1; - s->branch = refs_resolve_refdup(get_main_ref_store(the_repository), + s->branch = refs_resolve_refdup(get_main_ref_store(r), "HEAD", 0, NULL, NULL); s->reference = "HEAD"; s->fp = stdout; - s->index_file = repo_get_index_file(the_repository); + s->index_file = repo_get_index_file(r); s->change.strdup_strings = 1; s->untracked.strdup_strings = 1; s->ignored.strdup_strings = 1; @@ -646,7 +646,7 @@ static void wt_status_collect_changes_index(struct wt_status *s) repo_init_revisions(s->repo, &rev, NULL); memset(&opt, 0, sizeof(opt)); - opt.def = s->is_initial ? empty_tree_oid_hex(the_repository->hash_algo) : s->reference; + opt.def = s->is_initial ? empty_tree_oid_hex(s->repo->hash_algo) : s->reference; setup_revisions(0, NULL, &rev, &opt); rev.diffopt.flags.override_submodule_config = 1; @@ -1146,7 +1146,7 @@ static void wt_longstatus_print_verbose(struct wt_status *s) rev.diffopt.ita_invisible_in_index = 1; memset(&opt, 0, sizeof(opt)); - opt.def = s->is_initial ? empty_tree_oid_hex(the_repository->hash_algo) : s->reference; + opt.def = s->is_initial ? empty_tree_oid_hex(s->repo->hash_algo) : s->reference; setup_revisions(0, NULL, &rev, &opt); rev.diffopt.output_format |= DIFF_FORMAT_PATCH; @@ -1317,9 +1317,9 @@ static int split_commit_in_progress(struct wt_status *s) !s->branch || strcmp(s->branch, "HEAD")) return 0; - if (refs_read_ref_full(get_main_ref_store(the_repository), "HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE, + if (refs_read_ref_full(get_main_ref_store(s->repo), "HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE, &head_oid, &head_flags) || - refs_read_ref_full(get_main_ref_store(the_repository), "ORIG_HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE, + refs_read_ref_full(get_main_ref_store(s->repo), "ORIG_HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE, &orig_head_oid, &orig_head_flags)) return 0; if (head_flags & REF_ISSYMREF || orig_head_flags & REF_ISSYMREF) @@ -1432,7 +1432,7 @@ static void show_rebase_information(struct wt_status *s, i++) status_printf_ln(s, color, " %s", have_done.items[i].string); if (have_done.nr > nr_lines_to_show && s->hints) { - char *path = repo_git_path(the_repository, "rebase-merge/done"); + char *path = repo_git_path(s->repo, "rebase-merge/done"); status_printf_ln(s, color, _(" (see more in file %s)"), path); free(path); @@ -1534,7 +1534,7 @@ static void show_cherry_pick_in_progress(struct wt_status *s, else status_printf_ln(s, color, _("You are currently cherry-picking commit %s."), - repo_find_unique_abbrev(the_repository, &s->state.cherry_pick_head_oid, + repo_find_unique_abbrev(s->repo, &s->state.cherry_pick_head_oid, DEFAULT_ABBREV)); if (s->hints) { @@ -1564,7 +1564,7 @@ static void show_revert_in_progress(struct wt_status *s, else status_printf_ln(s, color, _("You are currently reverting commit %s."), - repo_find_unique_abbrev(the_repository, &s->state.revert_head_oid, + repo_find_unique_abbrev(s->repo, &s->state.revert_head_oid, DEFAULT_ABBREV)); if (s->hints) { if (has_unmerged(s)) @@ -1691,7 +1691,7 @@ static void wt_status_get_detached_from(struct repository *r, char *ref = NULL; strbuf_init(&cb.buf, 0); - if (refs_for_each_reflog_ent_reverse(get_main_ref_store(the_repository), "HEAD", grab_1st_switch, &cb) <= 0) { + if (refs_for_each_reflog_ent_reverse(get_main_ref_store(r), "HEAD", grab_1st_switch, &cb) <= 0) { strbuf_release(&cb.buf); return; } @@ -2099,7 +2099,7 @@ static void wt_shortstatus_print_tracking(struct wt_status *s) upstream_is_gone = 1; } - short_base = refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), + short_base = refs_shorten_unambiguous_ref(get_main_ref_store(s->repo), base, 0); color_fprintf(s->fp, header_color, "..."); color_fprintf(s->fp, branch_color_remote, "%s", short_base); @@ -2233,7 +2233,7 @@ static void wt_porcelain_v2_print_tracking(struct wt_status *s) ab_info = stat_tracking_info(branch, &nr_ahead, &nr_behind, &base, 0, s->ahead_behind_flags); if (base) { - base = refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), + base = refs_shorten_unambiguous_ref(get_main_ref_store(s->repo), base, 0); fprintf(s->fp, "# branch.upstream %s%c", base, eol); free((char *)base); From a7cd24de0b3b679c16ae3ee8215af06aeea1e6a3 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Wed, 18 Feb 2026 23:23:42 +0530 Subject: [PATCH 03/21] wt-status: use hash_algo from local repository instead of global the_hash_algo wt-status.c still uses the global the_hash_algo even though a repository instance is already available via struct wt_status. Replace uses of the_hash_algo with the hash algorithm stored in the associated repository (s->repo->hash_algo or r->hash_algo). This removes another dependency on global state and keeps wt-status consistent with local repository usage. Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- wt-status.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wt-status.c b/wt-status.c index b44b8377e5be12..264281fb6756f8 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1815,10 +1815,10 @@ void wt_status_get_state(struct repository *r, if (!sequencer_get_last_command(r, &action)) { if (action == REPLAY_PICK && !state->cherry_pick_in_progress) { state->cherry_pick_in_progress = 1; - oidcpy(&state->cherry_pick_head_oid, null_oid(the_hash_algo)); + oidcpy(&state->cherry_pick_head_oid, null_oid(r->hash_algo)); } else if (action == REPLAY_REVERT && !state->revert_in_progress) { state->revert_in_progress = 1; - oidcpy(&state->revert_head_oid, null_oid(the_hash_algo)); + oidcpy(&state->revert_head_oid, null_oid(r->hash_algo)); } } if (get_detached_from) @@ -2630,7 +2630,7 @@ int has_uncommitted_changes(struct repository *r, * We have no head (or it's corrupt); use the empty tree, * which will complain if the index is non-empty. */ - struct tree *tree = lookup_tree(r, the_hash_algo->empty_tree); + struct tree *tree = lookup_tree(r, r->hash_algo->empty_tree); add_pending_object(&rev_info, &tree->object, ""); } From 31c771ab443352741ecc3710d54a91890a68ee79 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:21 -0600 Subject: [PATCH 04/21] builtin/repo: update stats for each object When walking reachable objects in the repository, `count_objects()` processes a set of objects and updates the `struct object_stats`. In preparation for more granular statistics being collected, update the `struct object_stats` for each individual object instead. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 53 +++++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 0ea045abc13f8c..c7c9f0f4974d03 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -558,8 +558,6 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, { struct count_objects_data *data = cb_data; struct object_stats *stats = data->stats; - size_t inflated_total = 0; - size_t disk_total = 0; size_t object_count; for (size_t i = 0; i < oids->nr; i++) { @@ -575,33 +573,30 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, OBJECT_INFO_QUICK) < 0) continue; - inflated_total += inflated; - disk_total += disk; - } - - switch (type) { - case OBJ_TAG: - stats->type_counts.tags += oids->nr; - stats->inflated_sizes.tags += inflated_total; - stats->disk_sizes.tags += disk_total; - break; - case OBJ_COMMIT: - stats->type_counts.commits += oids->nr; - stats->inflated_sizes.commits += inflated_total; - stats->disk_sizes.commits += disk_total; - break; - case OBJ_TREE: - stats->type_counts.trees += oids->nr; - stats->inflated_sizes.trees += inflated_total; - stats->disk_sizes.trees += disk_total; - break; - case OBJ_BLOB: - stats->type_counts.blobs += oids->nr; - stats->inflated_sizes.blobs += inflated_total; - stats->disk_sizes.blobs += disk_total; - break; - default: - BUG("invalid object type"); + switch (type) { + case OBJ_TAG: + stats->type_counts.tags++; + stats->inflated_sizes.tags += inflated; + stats->disk_sizes.tags += disk; + break; + case OBJ_COMMIT: + stats->type_counts.commits++; + stats->inflated_sizes.commits += inflated; + stats->disk_sizes.commits += disk; + break; + case OBJ_TREE: + stats->type_counts.trees++; + stats->inflated_sizes.trees += inflated; + stats->disk_sizes.trees += disk; + break; + case OBJ_BLOB: + stats->type_counts.blobs++; + stats->inflated_sizes.blobs += inflated; + stats->disk_sizes.blobs += disk; + break; + default: + BUG("invalid object type"); + } } object_count = get_total_object_values(&stats->type_counts); From fa1752792711e7383376cf232eb72aac77d726d7 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:22 -0600 Subject: [PATCH 05/21] builtin/repo: add helper for printing keyvalue output The machine-parsable formats for the git-repo(1) "structure" subcommand print output in keyvalue pairs. Introduce the helper function `print_keyvalue()` to remove some code duplication and improve readability. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 77 +++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index c7c9f0f4974d03..782194cf4c52af 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -446,44 +446,51 @@ static void stats_table_clear(struct stats_table *table) string_list_clear(&table->rows, 1); } +static inline void print_keyvalue(const char *key, char key_delim, size_t value, + char value_delim) +{ + printf("%s%c%" PRIuMAX "%c", key, key_delim, (uintmax_t)value, + value_delim); +} + static void structure_keyvalue_print(struct repo_structure *stats, char key_delim, char value_delim) { - printf("references.branches.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.branches, value_delim); - printf("references.tags.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.tags, value_delim); - printf("references.remotes.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.remotes, value_delim); - printf("references.others.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->refs.others, value_delim); - - printf("objects.commits.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.commits, value_delim); - printf("objects.trees.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.trees, value_delim); - printf("objects.blobs.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.blobs, value_delim); - printf("objects.tags.count%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.type_counts.tags, value_delim); - - printf("objects.commits.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.commits, value_delim); - printf("objects.trees.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.trees, value_delim); - printf("objects.blobs.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.blobs, value_delim); - printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.inflated_sizes.tags, value_delim); - - printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.commits, value_delim); - printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.trees, value_delim); - printf("objects.blobs.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.blobs, value_delim); - printf("objects.tags.disk_size%c%" PRIuMAX "%c", key_delim, - (uintmax_t)stats->objects.disk_sizes.tags, value_delim); + print_keyvalue("references.branches.count", key_delim, + stats->refs.branches, value_delim); + print_keyvalue("references.tags.count", key_delim, + stats->refs.tags, value_delim); + print_keyvalue("references.remotes.count", key_delim, + stats->refs.remotes, value_delim); + print_keyvalue("references.others.count", key_delim, + stats->refs.others, value_delim); + + print_keyvalue("objects.commits.count", key_delim, + stats->objects.type_counts.commits, value_delim); + print_keyvalue("objects.trees.count", key_delim, + stats->objects.type_counts.trees, value_delim); + print_keyvalue("objects.blobs.count", key_delim, + stats->objects.type_counts.blobs, value_delim); + print_keyvalue("objects.tags.count", key_delim, + stats->objects.type_counts.tags, value_delim); + + print_keyvalue("objects.commits.inflated_size", key_delim, + stats->objects.inflated_sizes.commits, value_delim); + print_keyvalue("objects.trees.inflated_size", key_delim, + stats->objects.inflated_sizes.trees, value_delim); + print_keyvalue("objects.blobs.inflated_size", key_delim, + stats->objects.inflated_sizes.blobs, value_delim); + print_keyvalue("objects.tags.inflated_size", key_delim, + stats->objects.inflated_sizes.tags, value_delim); + + print_keyvalue("objects.commits.disk_size", key_delim, + stats->objects.disk_sizes.commits, value_delim); + print_keyvalue("objects.trees.disk_size", key_delim, + stats->objects.disk_sizes.trees, value_delim); + print_keyvalue("objects.blobs.disk_size", key_delim, + stats->objects.disk_sizes.blobs, value_delim); + print_keyvalue("objects.tags.disk_size", key_delim, + stats->objects.disk_sizes.tags, value_delim); fflush(stdout); } From e33ac9cc9e819f9de8ffe25c165393514cc61b12 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:23 -0600 Subject: [PATCH 06/21] builtin/repo: collect largest inflated objects The "structure" output for git-repo(1) shows the total inflated and disk sizes of reachable objects in the repository, but doesn't show the size of the largest individual objects. Since an individual object may be a large contributor to the overall repository size, it is useful for users to know the maximum size of individual objects. While interating across objects, record the size and OID of the largest objects encountered for each object type to provide as output. Note that the default "table" output format only displays size information and not the corresponding OID. In a subsequent commit, the table format is updated to add table annotations that mention the OID. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 1 + builtin/repo.c | 63 +++++++++++++++++++++++++++++++++++++ t/t1901-repo-structure.sh | 28 +++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index 7d70270dfa5716..e812e5915864ee 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -52,6 +52,7 @@ supported: * Reachable object counts categorized by type * Total inflated size of reachable objects by type * Total disk size of reachable objects by type +* Largest reachable objects in the repository by type + The output format can be chosen through the flag `--format`. Three formats are supported: diff --git a/builtin/repo.c b/builtin/repo.c index 782194cf4c52af..59d5cb25516e17 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "environment.h" +#include "hash.h" #include "hex.h" #include "odb.h" #include "parse-options.h" @@ -197,6 +198,18 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, return print_fields(argc, argv, repo, format); } +struct object_data { + struct object_id oid; + size_t value; +}; + +struct largest_objects { + struct object_data tag_size; + struct object_data commit_size; + struct object_data tree_size; + struct object_data blob_size; +}; + struct ref_stats { size_t branches; size_t remotes; @@ -215,6 +228,7 @@ struct object_stats { struct object_values type_counts; struct object_values inflated_sizes; struct object_values disk_sizes; + struct largest_objects largest; }; struct repo_structure { @@ -371,6 +385,21 @@ static void stats_table_setup_structure(struct stats_table *table, " * %s", _("Blobs")); stats_table_size_addf(table, objects->disk_sizes.tags, " * %s", _("Tags")); + + stats_table_addf(table, ""); + stats_table_addf(table, "* %s", _("Largest objects")); + stats_table_addf(table, " * %s", _("Commits")); + stats_table_size_addf(table, objects->largest.commit_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Trees")); + stats_table_size_addf(table, objects->largest.tree_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Blobs")); + stats_table_size_addf(table, objects->largest.blob_size.value, + " * %s", _("Maximum size")); + stats_table_addf(table, " * %s", _("Tags")); + stats_table_size_addf(table, objects->largest.tag_size.value, + " * %s", _("Maximum size")); } static void stats_table_print_structure(const struct stats_table *table) @@ -453,6 +482,14 @@ static inline void print_keyvalue(const char *key, char key_delim, size_t value, value_delim); } +static void print_object_data(const char *key, char key_delim, + struct object_data *data, char value_delim) +{ + print_keyvalue(key, key_delim, data->value, value_delim); + printf("%s_oid%c%s%c", key, key_delim, oid_to_hex(&data->oid), + value_delim); +} + static void structure_keyvalue_print(struct repo_structure *stats, char key_delim, char value_delim) { @@ -492,6 +529,15 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_keyvalue("objects.tags.disk_size", key_delim, stats->objects.disk_sizes.tags, value_delim); + print_object_data("objects.commits.max_size", key_delim, + &stats->objects.largest.commit_size, value_delim); + print_object_data("objects.trees.max_size", key_delim, + &stats->objects.largest.tree_size, value_delim); + print_object_data("objects.blobs.max_size", key_delim, + &stats->objects.largest.blob_size, value_delim); + print_object_data("objects.tags.max_size", key_delim, + &stats->objects.largest.tag_size, value_delim); + fflush(stdout); } @@ -560,6 +606,15 @@ struct count_objects_data { struct progress *progress; }; +static void check_largest(struct object_data *data, struct object_id *oid, + size_t value) +{ + if (value > data->value || is_null_oid(&data->oid)) { + oidcpy(&data->oid, oid); + data->value = value; + } +} + static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { @@ -585,21 +640,29 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, stats->type_counts.tags++; stats->inflated_sizes.tags += inflated; stats->disk_sizes.tags += disk; + check_largest(&stats->largest.tag_size, &oids->oid[i], + inflated); break; case OBJ_COMMIT: stats->type_counts.commits++; stats->inflated_sizes.commits += inflated; stats->disk_sizes.commits += disk; + check_largest(&stats->largest.commit_size, &oids->oid[i], + inflated); break; case OBJ_TREE: stats->type_counts.trees++; stats->inflated_sizes.trees += inflated; stats->disk_sizes.trees += disk; + check_largest(&stats->largest.tree_size, &oids->oid[i], + inflated); break; case OBJ_BLOB: stats->type_counts.blobs++; stats->inflated_sizes.blobs += inflated; stats->disk_sizes.blobs += disk; + check_largest(&stats->largest.blob_size, &oids->oid[i], + inflated); break; default: BUG("invalid object type"); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 17ff164b0596c9..1999f325d05d00 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -52,6 +52,16 @@ test_expect_success 'empty repository' ' | * Trees | 0 B | | * Blobs | 0 B | | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -104,6 +114,16 @@ test_expect_success SHA1 'repository with references and objects' ' | * Trees | $(object_type_disk_usage tree true) | | * Blobs | $(object_type_disk_usage blob true) | | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 223 B | + | * Trees | | + | * Maximum size | 32.29 KiB | + | * Blobs | | + | * Maximum size | 13 B | + | * Tags | | + | * Maximum size | 132 B | EOF git repo structure >out 2>err && @@ -138,6 +158,14 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.trees.disk_size=$(object_type_disk_usage tree) objects.blobs.disk_size=$(object_type_disk_usage blob) objects.tags.disk_size=$(object_type_disk_usage tag) + objects.commits.max_size=221 + objects.commits.max_size_oid=de3508174b5c2ace6993da67cae9be9069e2df39 + objects.trees.max_size=1335 + objects.trees.max_size_oid=09931deea9d81ec21300d3e13c74412f32eacec5 + objects.blobs.max_size=11 + objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f + objects.tags.max_size=132 + objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c EOF git repo structure --format=keyvalue >out 2>err && From e00bb8c76e18357da3a2098cdac2a3c2c312c17d Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:24 -0600 Subject: [PATCH 07/21] builtin/repo: add OID annotations to table output The "structure" output for git-repo(1) does not show the corresponding OIDs for the largest objects in its "table" output. Update the output to include a list of OID annotations with an index to the corresponding row in the table. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 78 +++++++++++++++++--- t/t1901-repo-structure.sh | 145 ++++++++++++++++++++------------------ 2 files changed, 143 insertions(+), 80 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 59d5cb25516e17..ea7f5acd3e25a7 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -238,6 +238,7 @@ struct repo_structure { struct stats_table { struct string_list rows; + struct string_list annotations; int name_col_width; int value_col_width; @@ -250,6 +251,8 @@ struct stats_table { struct stats_table_entry { char *value; const char *unit; + size_t index; + struct object_id *oid; }; static void stats_table_vaddf(struct stats_table *table, @@ -272,6 +275,12 @@ static void stats_table_vaddf(struct stats_table *table, table->name_col_width = name_width; if (!entry) return; + if (entry->oid) { + entry->index = table->annotations.nr + 1; + strbuf_addf(&buf, "[%" PRIuMAX "] %s", (uintmax_t)entry->index, + oid_to_hex(entry->oid)); + string_list_append_nodup(&table->annotations, strbuf_detach(&buf, NULL)); + } if (entry->value) { int value_width = utf8_strwidth(entry->value); if (value_width > table->value_col_width) @@ -282,6 +291,8 @@ static void stats_table_vaddf(struct stats_table *table, if (unit_width > table->unit_col_width) table->unit_col_width = unit_width; } + + strbuf_release(&buf); } static void stats_table_addf(struct stats_table *table, const char *format, ...) @@ -321,6 +332,27 @@ static void stats_table_size_addf(struct stats_table *table, size_t value, va_end(ap); } +static void stats_table_object_size_addf(struct stats_table *table, + struct object_id *oid, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + humanise_bytes(value, &entry->value, &entry->unit, HUMANISE_COMPACT); + + /* + * A NULL OID should not have a table annotation. + */ + if (!is_null_oid(oid)) + entry->oid = oid; + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + static inline size_t get_total_reference_count(struct ref_stats *stats) { return stats->branches + stats->remotes + stats->tags + stats->others; @@ -389,19 +421,29 @@ static void stats_table_setup_structure(struct stats_table *table, stats_table_addf(table, ""); stats_table_addf(table, "* %s", _("Largest objects")); stats_table_addf(table, " * %s", _("Commits")); - stats_table_size_addf(table, objects->largest.commit_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.commit_size.oid, + objects->largest.commit_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Trees")); - stats_table_size_addf(table, objects->largest.tree_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.tree_size.oid, + objects->largest.tree_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Blobs")); - stats_table_size_addf(table, objects->largest.blob_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.blob_size.oid, + objects->largest.blob_size.value, + " * %s", _("Maximum size")); stats_table_addf(table, " * %s", _("Tags")); - stats_table_size_addf(table, objects->largest.tag_size.value, - " * %s", _("Maximum size")); + stats_table_object_size_addf(table, + &objects->largest.tag_size.oid, + objects->largest.tag_size.value, + " * %s", _("Maximum size")); } +#define INDEX_WIDTH 4 + static void stats_table_print_structure(const struct stats_table *table) { const char *name_col_title = _("Repository structure"); @@ -420,7 +462,8 @@ static void stats_table_print_structure(const struct stats_table *table) value_col_width = title_value_width - unit_col_width; strbuf_addstr(&buf, "| "); - strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, name_col_title); + strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width + INDEX_WIDTH, + name_col_title); strbuf_addstr(&buf, " | "); strbuf_utf8_align(&buf, ALIGN_LEFT, value_col_width + unit_col_width + 1, value_col_title); @@ -428,7 +471,7 @@ static void stats_table_print_structure(const struct stats_table *table) printf("%s\n", buf.buf); printf("| "); - for (int i = 0; i < name_col_width; i++) + for (int i = 0; i < name_col_width + INDEX_WIDTH; i++) putchar('-'); printf(" | "); for (int i = 0; i < value_col_width + unit_col_width + 1; i++) @@ -450,6 +493,13 @@ static void stats_table_print_structure(const struct stats_table *table) strbuf_reset(&buf); strbuf_addstr(&buf, "| "); strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, item->string); + + if (entry && entry->oid) + strbuf_addf(&buf, " [%" PRIuMAX "]", + (uintmax_t)entry->index); + else + strbuf_addchars(&buf, ' ', INDEX_WIDTH); + strbuf_addstr(&buf, " | "); strbuf_utf8_align(&buf, ALIGN_RIGHT, value_col_width, value); strbuf_addch(&buf, ' '); @@ -458,6 +508,12 @@ static void stats_table_print_structure(const struct stats_table *table) printf("%s\n", buf.buf); } + if (table->annotations.nr) { + printf("\n"); + for_each_string_list_item(item, &table->annotations) + printf("%s\n", item->string); + } + strbuf_release(&buf); } @@ -473,6 +529,7 @@ static void stats_table_clear(struct stats_table *table) } string_list_clear(&table->rows, 1); + string_list_clear(&table->annotations, 1); } static inline void print_keyvalue(const char *key, char key_delim, size_t value, @@ -702,6 +759,7 @@ static int cmd_repo_structure(int argc, const char **argv, const char *prefix, { struct stats_table table = { .rows = STRING_LIST_INIT_DUP, + .annotations = STRING_LIST_INIT_DUP, }; enum output_format format = FORMAT_TABLE; struct repo_structure stats = { 0 }; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 1999f325d05d00..918af7269f8462 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -27,41 +27,41 @@ test_expect_success 'empty repository' ' ( cd repo && cat >expect <<-\EOF && - | Repository structure | Value | - | -------------------- | ------ | - | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | - | | | - | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | - | * Inflated size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | * Disk size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 0 B | - | * Trees | | - | * Maximum size | 0 B | - | * Blobs | | - | * Maximum size | 0 B | - | * Tags | | - | * Maximum size | 0 B | + | Repository structure | Value | + | ------------------------ | ------ | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | + | * Inflated size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | * Disk size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -89,41 +89,46 @@ test_expect_success SHA1 'repository with references and objects' ' # git-rev-list(1) --disk-usage=human option printing the full # "byte/bytes" unit string instead of just "B". cat >expect <<-EOF && - | Repository structure | Value | - | -------------------- | ---------- | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 3.02 k | - | * Commits | 1.01 k | - | * Trees | 1.01 k | - | * Blobs | 1.01 k | - | * Tags | 1 | - | * Inflated size | 16.03 MiB | - | * Commits | 217.92 KiB | - | * Trees | 15.81 MiB | - | * Blobs | 11.68 KiB | - | * Tags | 132 B | - | * Disk size | $(object_type_disk_usage all true) | - | * Commits | $(object_type_disk_usage commit true) | - | * Trees | $(object_type_disk_usage tree true) | - | * Blobs | $(object_type_disk_usage blob true) | - | * Tags | $(object_type_disk_usage tag) B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 223 B | - | * Trees | | - | * Maximum size | 32.29 KiB | - | * Blobs | | - | * Maximum size | 13 B | - | * Tags | | - | * Maximum size | 132 B | + | Repository structure | Value | + | ------------------------ | ---------- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | + | * Inflated size | 16.03 MiB | + | * Commits | 217.92 KiB | + | * Trees | 15.81 MiB | + | * Blobs | 11.68 KiB | + | * Tags | 132 B | + | * Disk size | $(object_type_disk_usage all true) | + | * Commits | $(object_type_disk_usage commit true) | + | * Trees | $(object_type_disk_usage tree true) | + | * Blobs | $(object_type_disk_usage blob true) | + | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size [1] | 223 B | + | * Trees | | + | * Maximum size [2] | 32.29 KiB | + | * Blobs | | + | * Maximum size [3] | 13 B | + | * Tags | | + | * Maximum size [4] | 132 B | + + [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a + [2] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [3] 97d808e45116bf02103490294d3d46dad7a2ac62 + [4] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && From 18952a1ef1a14d2fca19638118dc2eea1e24d671 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:25 -0600 Subject: [PATCH 08/21] builtin/repo: find commit with most parents Complex merge events may produce an octopus merge where the resulting merge commit has more than two parents. While iterating through objects in the repository for git-repo-structure, identify the commit with the most parents and display it in the output. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 45 ++++++++++++ t/t1901-repo-structure.sh | 151 ++++++++++++++++++++------------------ 2 files changed, 123 insertions(+), 73 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index ea7f5acd3e25a7..047f5e098d349b 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" +#include "commit.h" #include "environment.h" #include "hash.h" #include "hex.h" @@ -208,6 +209,8 @@ struct largest_objects { struct object_data commit_size; struct object_data tree_size; struct object_data blob_size; + + struct object_data parent_count; }; struct ref_stats { @@ -318,6 +321,27 @@ static void stats_table_count_addf(struct stats_table *table, size_t value, va_end(ap); } +static void stats_table_object_count_addf(struct stats_table *table, + struct object_id *oid, size_t value, + const char *format, ...) +{ + struct stats_table_entry *entry; + va_list ap; + + CALLOC_ARRAY(entry, 1); + humanise_count(value, &entry->value, &entry->unit); + + /* + * A NULL OID should not have a table annotation. + */ + if (!is_null_oid(oid)) + entry->oid = oid; + + va_start(ap, format); + stats_table_vaddf(table, entry, format, ap); + va_end(ap); +} + static void stats_table_size_addf(struct stats_table *table, size_t value, const char *format, ...) { @@ -425,6 +449,10 @@ static void stats_table_setup_structure(struct stats_table *table, &objects->largest.commit_size.oid, objects->largest.commit_size.value, " * %s", _("Maximum size")); + stats_table_object_count_addf(table, + &objects->largest.parent_count.oid, + objects->largest.parent_count.value, + " * %s", _("Maximum parents")); stats_table_addf(table, " * %s", _("Trees")); stats_table_object_size_addf(table, &objects->largest.tree_size.oid, @@ -595,6 +623,9 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_object_data("objects.tags.max_size", key_delim, &stats->objects.largest.tag_size, value_delim); + print_object_data("objects.commits.max_parents", key_delim, + &stats->objects.largest.parent_count, value_delim); + fflush(stdout); } @@ -682,16 +713,24 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, for (size_t i = 0; i < oids->nr; i++) { struct object_info oi = OBJECT_INFO_INIT; unsigned long inflated; + struct commit *commit; + struct object *obj; + void *content; off_t disk; + int eaten; oi.sizep = &inflated; oi.disk_sizep = &disk; + oi.contentp = &content; if (odb_read_object_info_extended(data->odb, &oids->oid[i], &oi, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) < 0) continue; + obj = parse_object_buffer(the_repository, &oids->oid[i], type, + inflated, content, &eaten); + switch (type) { case OBJ_TAG: stats->type_counts.tags++; @@ -701,11 +740,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, inflated); break; case OBJ_COMMIT: + commit = object_as_type(obj, OBJ_COMMIT, 0); stats->type_counts.commits++; stats->inflated_sizes.commits += inflated; stats->disk_sizes.commits += disk; check_largest(&stats->largest.commit_size, &oids->oid[i], inflated); + check_largest(&stats->largest.parent_count, &oids->oid[i], + commit_list_count(commit->parents)); break; case OBJ_TREE: stats->type_counts.trees++; @@ -724,6 +766,9 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, default: BUG("invalid object type"); } + + if (!eaten) + free(content); } object_count = get_total_object_values(&stats->type_counts); diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index 918af7269f8462..d003d64a8e0b10 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -27,41 +27,42 @@ test_expect_success 'empty repository' ' ( cd repo && cat >expect <<-\EOF && - | Repository structure | Value | - | ------------------------ | ------ | - | * References | | - | * Count | 0 | - | * Branches | 0 | - | * Tags | 0 | - | * Remotes | 0 | - | * Others | 0 | - | | | - | * Reachable objects | | - | * Count | 0 | - | * Commits | 0 | - | * Trees | 0 | - | * Blobs | 0 | - | * Tags | 0 | - | * Inflated size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | * Disk size | 0 B | - | * Commits | 0 B | - | * Trees | 0 B | - | * Blobs | 0 B | - | * Tags | 0 B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size | 0 B | - | * Trees | | - | * Maximum size | 0 B | - | * Blobs | | - | * Maximum size | 0 B | - | * Tags | | - | * Maximum size | 0 B | + | Repository structure | Value | + | ------------------------- | ------ | + | * References | | + | * Count | 0 | + | * Branches | 0 | + | * Tags | 0 | + | * Remotes | 0 | + | * Others | 0 | + | | | + | * Reachable objects | | + | * Count | 0 | + | * Commits | 0 | + | * Trees | 0 | + | * Blobs | 0 | + | * Tags | 0 | + | * Inflated size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | * Disk size | 0 B | + | * Commits | 0 B | + | * Trees | 0 B | + | * Blobs | 0 B | + | * Tags | 0 B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size | 0 B | + | * Maximum parents | 0 | + | * Trees | | + | * Maximum size | 0 B | + | * Blobs | | + | * Maximum size | 0 B | + | * Tags | | + | * Maximum size | 0 B | EOF git repo structure >out 2>err && @@ -89,46 +90,48 @@ test_expect_success SHA1 'repository with references and objects' ' # git-rev-list(1) --disk-usage=human option printing the full # "byte/bytes" unit string instead of just "B". cat >expect <<-EOF && - | Repository structure | Value | - | ------------------------ | ---------- | - | * References | | - | * Count | 4 | - | * Branches | 1 | - | * Tags | 1 | - | * Remotes | 1 | - | * Others | 1 | - | | | - | * Reachable objects | | - | * Count | 3.02 k | - | * Commits | 1.01 k | - | * Trees | 1.01 k | - | * Blobs | 1.01 k | - | * Tags | 1 | - | * Inflated size | 16.03 MiB | - | * Commits | 217.92 KiB | - | * Trees | 15.81 MiB | - | * Blobs | 11.68 KiB | - | * Tags | 132 B | - | * Disk size | $(object_type_disk_usage all true) | - | * Commits | $(object_type_disk_usage commit true) | - | * Trees | $(object_type_disk_usage tree true) | - | * Blobs | $(object_type_disk_usage blob true) | - | * Tags | $(object_type_disk_usage tag) B | - | | | - | * Largest objects | | - | * Commits | | - | * Maximum size [1] | 223 B | - | * Trees | | - | * Maximum size [2] | 32.29 KiB | - | * Blobs | | - | * Maximum size [3] | 13 B | - | * Tags | | - | * Maximum size [4] | 132 B | + | Repository structure | Value | + | ------------------------- | ---------- | + | * References | | + | * Count | 4 | + | * Branches | 1 | + | * Tags | 1 | + | * Remotes | 1 | + | * Others | 1 | + | | | + | * Reachable objects | | + | * Count | 3.02 k | + | * Commits | 1.01 k | + | * Trees | 1.01 k | + | * Blobs | 1.01 k | + | * Tags | 1 | + | * Inflated size | 16.03 MiB | + | * Commits | 217.92 KiB | + | * Trees | 15.81 MiB | + | * Blobs | 11.68 KiB | + | * Tags | 132 B | + | * Disk size | $(object_type_disk_usage all true) | + | * Commits | $(object_type_disk_usage commit true) | + | * Trees | $(object_type_disk_usage tree true) | + | * Blobs | $(object_type_disk_usage blob true) | + | * Tags | $(object_type_disk_usage tag) B | + | | | + | * Largest objects | | + | * Commits | | + | * Maximum size [1] | 223 B | + | * Maximum parents [2] | 1 | + | * Trees | | + | * Maximum size [3] | 32.29 KiB | + | * Blobs | | + | * Maximum size [4] | 13 B | + | * Tags | | + | * Maximum size [5] | 132 B | [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a - [2] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c - [3] 97d808e45116bf02103490294d3d46dad7a2ac62 - [4] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 + [2] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a + [3] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [4] 97d808e45116bf02103490294d3d46dad7a2ac62 + [5] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && @@ -171,6 +174,8 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f objects.tags.max_size=132 objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c + objects.commits.max_parents=1 + objects.commits.max_parents_oid=de3508174b5c2ace6993da67cae9be9069e2df39 EOF git repo structure --format=keyvalue >out 2>err && From 42e69594113d647f53d65440f2ede554570b9f40 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Mon, 2 Mar 2026 15:45:26 -0600 Subject: [PATCH 09/21] builtin/repo: find tree with most entries The size of a tree object usually corresponds with the number of entries it has. While iterating through objects in the repository for git-repo-structure, identify the tree with the most entries and display it in the output. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/repo.c | 25 +++++++++++++++++++++++++ t/t1901-repo-structure.sh | 13 +++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 047f5e098d349b..e726bb858c1580 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -16,6 +16,8 @@ #include "strbuf.h" #include "string-list.h" #include "shallow.h" +#include "tree.h" +#include "tree-walk.h" #include "utf8.h" static const char *const repo_usage[] = { @@ -211,6 +213,7 @@ struct largest_objects { struct object_data blob_size; struct object_data parent_count; + struct object_data tree_entries; }; struct ref_stats { @@ -458,6 +461,10 @@ static void stats_table_setup_structure(struct stats_table *table, &objects->largest.tree_size.oid, objects->largest.tree_size.value, " * %s", _("Maximum size")); + stats_table_object_count_addf(table, + &objects->largest.tree_entries.oid, + objects->largest.tree_entries.value, + " * %s", _("Maximum entries")); stats_table_addf(table, " * %s", _("Blobs")); stats_table_object_size_addf(table, &objects->largest.blob_size.oid, @@ -625,6 +632,8 @@ static void structure_keyvalue_print(struct repo_structure *stats, print_object_data("objects.commits.max_parents", key_delim, &stats->objects.largest.parent_count, value_delim); + print_object_data("objects.trees.max_entries", key_delim, + &stats->objects.largest.tree_entries, value_delim); fflush(stdout); } @@ -703,6 +712,20 @@ static void check_largest(struct object_data *data, struct object_id *oid, } } +static size_t count_tree_entries(struct object *obj) +{ + struct tree *t = object_as_type(obj, OBJ_TREE, 0); + struct name_entry entry; + struct tree_desc desc; + size_t count = 0; + + init_tree_desc(&desc, &t->object.oid, t->buffer, t->size); + while (tree_entry(&desc, &entry)) + count++; + + return count; +} + static int count_objects(const char *path UNUSED, struct oid_array *oids, enum object_type type, void *cb_data) { @@ -755,6 +778,8 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, stats->disk_sizes.trees += disk; check_largest(&stats->largest.tree_size, &oids->oid[i], inflated); + check_largest(&stats->largest.tree_entries, &oids->oid[i], + count_tree_entries(obj)); break; case OBJ_BLOB: stats->type_counts.blobs++; diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh index d003d64a8e0b10..12ed67e8468985 100755 --- a/t/t1901-repo-structure.sh +++ b/t/t1901-repo-structure.sh @@ -59,6 +59,7 @@ test_expect_success 'empty repository' ' | * Maximum parents | 0 | | * Trees | | | * Maximum size | 0 B | + | * Maximum entries | 0 | | * Blobs | | | * Maximum size | 0 B | | * Tags | | @@ -122,16 +123,18 @@ test_expect_success SHA1 'repository with references and objects' ' | * Maximum parents [2] | 1 | | * Trees | | | * Maximum size [3] | 32.29 KiB | + | * Maximum entries [4] | 1.01 k | | * Blobs | | - | * Maximum size [4] | 13 B | + | * Maximum size [5] | 13 B | | * Tags | | - | * Maximum size [5] | 132 B | + | * Maximum size [6] | 132 B | [1] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a [2] 0dc91eb18580102a3a216c8bfecedeba2b9f9b9a [3] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c - [4] 97d808e45116bf02103490294d3d46dad7a2ac62 - [5] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 + [4] 60665251ab71dbd8c18d9bf2174f4ee0d58aa06c + [5] 97d808e45116bf02103490294d3d46dad7a2ac62 + [6] 4dae4f5954f5e6feb3577cfb1b181daa3fd3afd2 EOF git repo structure >out 2>err && @@ -176,6 +179,8 @@ test_expect_success SHA1 'keyvalue and nul format' ' objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c objects.commits.max_parents=1 objects.commits.max_parents_oid=de3508174b5c2ace6993da67cae9be9069e2df39 + objects.trees.max_entries=42 + objects.trees.max_entries_oid=09931deea9d81ec21300d3e13c74412f32eacec5 EOF git repo structure --format=keyvalue >out 2>err && From 1dd27bfbfdc0f3b2071ecb8b505476f4caa56a13 Mon Sep 17 00:00:00 2001 From: Tian Yuchen Date: Wed, 4 Mar 2026 22:15:26 +0800 Subject: [PATCH 10/21] setup: improve error diagnosis for invalid .git files 'read_gitfile_gently()' treats any non-regular file as 'READ_GITFILE_ERR_NOT_A_FILE' and fails to discern between 'ENOENT' and other stat failures. This flawed error reporting is noted by two 'NEEDSWORK' comments. Address these comments by introducing two new error codes: 'READ_GITFILE_ERR_MISSING'(which groups the "file missing" scenarios together) and 'READ_GITFILE_ERR_IS_A_DIR': 1. Update 'read_gitfile_error_die()' to treat 'IS_A_DIR', 'MISSING', 'NOT_A_FILE' and 'STAT_FAILED' as non-fatal no-ops. This accommodates intentional non-repo scenarios (e.g., GIT_DIR=/dev/null). 2. Explicitly catch 'NOT_A_FILE' and 'STAT_FAILED' during discovery and call 'die()' if 'die_on_error' is set. 3. Unconditionally pass '&error_code' to 'read_gitfile_gently()'. 4. Only invoke 'is_git_directory()' when we explicitly receive 'READ_GITFILE_ERR_IS_A_DIR', avoiding redundant checks. Additionally, audit external callers of 'read_gitfile_gently()' in 'submodule.c' and 'worktree.c' to accommodate the refined error codes. Signed-off-by: Tian Yuchen Signed-off-by: Junio C Hamano --- setup.c | 47 ++++++++++++++++----- setup.h | 2 + submodule.c | 2 +- t/meson.build | 1 + t/t0009-git-dir-validation.sh | 77 +++++++++++++++++++++++++++++++++++ worktree.c | 6 ++- 6 files changed, 121 insertions(+), 14 deletions(-) create mode 100755 t/t0009-git-dir-validation.sh diff --git a/setup.c b/setup.c index b723f8b33931bd..0ae9c8846612f4 100644 --- a/setup.c +++ b/setup.c @@ -895,8 +895,10 @@ int verify_repository_format(const struct repository_format *format, void read_gitfile_error_die(int error_code, const char *path, const char *dir) { switch (error_code) { - case READ_GITFILE_ERR_STAT_FAILED: case READ_GITFILE_ERR_NOT_A_FILE: + case READ_GITFILE_ERR_STAT_FAILED: + case READ_GITFILE_ERR_MISSING: + case READ_GITFILE_ERR_IS_A_DIR: /* non-fatal; follow return path */ break; case READ_GITFILE_ERR_OPEN_FAILED: @@ -939,8 +941,14 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) static struct strbuf realpath = STRBUF_INIT; if (stat(path, &st)) { - /* NEEDSWORK: discern between ENOENT vs other errors */ - error_code = READ_GITFILE_ERR_STAT_FAILED; + if (errno == ENOENT || errno == ENOTDIR) + error_code = READ_GITFILE_ERR_MISSING; + else + error_code = READ_GITFILE_ERR_STAT_FAILED; + goto cleanup_return; + } + if (S_ISDIR(st.st_mode)) { + error_code = READ_GITFILE_ERR_IS_A_DIR; goto cleanup_return; } if (!S_ISREG(st.st_mode)) { @@ -1576,20 +1584,37 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir, if (offset > min_offset) strbuf_addch(dir, '/'); strbuf_addstr(dir, DEFAULT_GIT_DIR_ENVIRONMENT); - gitdirenv = read_gitfile_gently(dir->buf, die_on_error ? - NULL : &error_code); + gitdirenv = read_gitfile_gently(dir->buf, &error_code); if (!gitdirenv) { - if (die_on_error || - error_code == READ_GITFILE_ERR_NOT_A_FILE) { - /* NEEDSWORK: fail if .git is not file nor dir */ + switch (error_code) { + case READ_GITFILE_ERR_MISSING: + /* no .git in this directory, move on */ + break; + case READ_GITFILE_ERR_IS_A_DIR: if (is_git_directory(dir->buf)) { gitdirenv = DEFAULT_GIT_DIR_ENVIRONMENT; gitdir_path = xstrdup(dir->buf); } - } else if (error_code != READ_GITFILE_ERR_STAT_FAILED) - return GIT_DIR_INVALID_GITFILE; - } else + break; + case READ_GITFILE_ERR_STAT_FAILED: + if (die_on_error) + die(_("error reading '%s'"), dir->buf); + else + return GIT_DIR_INVALID_GITFILE; + case READ_GITFILE_ERR_NOT_A_FILE: + if (die_on_error) + die(_("not a regular file: '%s'"), dir->buf); + else + return GIT_DIR_INVALID_GITFILE; + default: + if (die_on_error) + read_gitfile_error_die(error_code, dir->buf, NULL); + else + return GIT_DIR_INVALID_GITFILE; + } + } else { gitfile = xstrdup(dir->buf); + } /* * Earlier, we tentatively added DEFAULT_GIT_DIR_ENVIRONMENT * to check that directory for a repository. diff --git a/setup.h b/setup.h index d55dcc66086308..1a0d010b5d2abf 100644 --- a/setup.h +++ b/setup.h @@ -36,6 +36,8 @@ int is_nonbare_repository_dir(struct strbuf *path); #define READ_GITFILE_ERR_NO_PATH 6 #define READ_GITFILE_ERR_NOT_A_REPO 7 #define READ_GITFILE_ERR_TOO_LARGE 8 +#define READ_GITFILE_ERR_MISSING 9 +#define READ_GITFILE_ERR_IS_A_DIR 10 void read_gitfile_error_die(int error_code, const char *path, const char *dir); const char *read_gitfile_gently(const char *path, int *return_error_code); #define read_gitfile(path) read_gitfile_gently((path), NULL) diff --git a/submodule.c b/submodule.c index 40a5c6fb9d1545..e733f8a669e19a 100644 --- a/submodule.c +++ b/submodule.c @@ -2413,7 +2413,7 @@ void absorb_git_dir_into_superproject(const char *path, const struct submodule *sub; struct strbuf sub_gitdir = STRBUF_INIT; - if (err_code == READ_GITFILE_ERR_STAT_FAILED) { + if (err_code == READ_GITFILE_ERR_MISSING) { /* unpopulated as expected */ strbuf_release(&gitdir); return; diff --git a/t/meson.build b/t/meson.build index 459c52a48972e4..71a6f07a11efd1 100644 --- a/t/meson.build +++ b/t/meson.build @@ -80,6 +80,7 @@ integration_tests = [ 't0006-date.sh', 't0007-git-var.sh', 't0008-ignores.sh', + 't0009-git-dir-validation.sh', 't0010-racy-git.sh', 't0012-help.sh', 't0013-sha1dc.sh', diff --git a/t/t0009-git-dir-validation.sh b/t/t0009-git-dir-validation.sh new file mode 100755 index 00000000000000..33d21ed9ea1061 --- /dev/null +++ b/t/t0009-git-dir-validation.sh @@ -0,0 +1,77 @@ +#!/bin/sh + +test_description='setup: validation of .git file/directory types + +Verify that setup_git_directory() correctly handles: +1. Valid .git directories (including symlinks to them). +2. Invalid .git files (FIFOs, sockets) by erroring out. +3. Invalid .git files (garbage) by erroring out. +' + +. ./test-lib.sh + +test_expect_success 'setup: create parent git repository' ' + git init parent && + test_commit -C parent "root-commit" +' + +test_expect_success SYMLINKS 'setup: .git as a symlink to a directory is valid' ' + test_when_finished "rm -rf parent/link-to-dir" && + mkdir -p parent/link-to-dir && + ( + cd parent/link-to-dir && + git init real-repo && + ln -s real-repo/.git .git && + git rev-parse --git-dir >actual && + echo .git >expect && + test_cmp expect actual + ) +' + +test_expect_success PIPE 'setup: .git as a FIFO (named pipe) is rejected' ' + test_when_finished "rm -rf parent/fifo-trap" && + mkdir -p parent/fifo-trap && + ( + cd parent/fifo-trap && + mkfifo .git && + test_must_fail git rev-parse --git-dir 2>stderr && + grep "not a regular file" stderr + ) +' + +test_expect_success SYMLINKS,PIPE 'setup: .git as a symlink to a FIFO is rejected' ' + test_when_finished "rm -rf parent/symlink-fifo-trap" && + mkdir -p parent/symlink-fifo-trap && + ( + cd parent/symlink-fifo-trap && + mkfifo target-fifo && + ln -s target-fifo .git && + test_must_fail git rev-parse --git-dir 2>stderr && + grep "not a regular file" stderr + ) +' + +test_expect_success 'setup: .git with garbage content is rejected' ' + test_when_finished "rm -rf parent/garbage-trap" && + mkdir -p parent/garbage-trap && + ( + cd parent/garbage-trap && + echo "garbage" >.git && + test_must_fail git rev-parse --git-dir 2>stderr && + grep "invalid gitfile format" stderr + ) +' + +test_expect_success 'setup: .git as an empty directory is ignored' ' + test_when_finished "rm -rf parent/empty-dir" && + mkdir -p parent/empty-dir && + ( + cd parent/empty-dir && + git rev-parse --git-dir >expect && + mkdir .git && + git rev-parse --git-dir >actual && + test_cmp expect actual + ) +' + +test_done diff --git a/worktree.c b/worktree.c index 9308389cb6f029..d1165e1d1ce1ca 100644 --- a/worktree.c +++ b/worktree.c @@ -653,7 +653,8 @@ static void repair_gitfile(struct worktree *wt, } } - if (err == READ_GITFILE_ERR_NOT_A_FILE) + if (err == READ_GITFILE_ERR_NOT_A_FILE || + err == READ_GITFILE_ERR_IS_A_DIR) fn(1, wt->path, _(".git is not a file"), cb_data); else if (err) repair = _(".git file broken"); @@ -833,7 +834,8 @@ void repair_worktree_at_path(const char *path, strbuf_addstr(&backlink, dotgit_contents); strbuf_realpath_forgiving(&backlink, backlink.buf, 0); } - } else if (err == READ_GITFILE_ERR_NOT_A_FILE) { + } else if (err == READ_GITFILE_ERR_NOT_A_FILE || + err == READ_GITFILE_ERR_IS_A_DIR) { fn(1, dotgit.buf, _("unable to locate repository; .git is not a file"), cb_data); goto done; } else if (err == READ_GITFILE_ERR_NOT_A_REPO) { From 26b974b3a9608adee6f964e9effbac86d0220bc3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:08:54 -0500 Subject: [PATCH 11/21] check_connected(): delay opening new_pack In check_connected(), if the transport tells us we got a single packfile that has already been verified as self-contained and connected, then we can skip checking connectivity for any tips that are mentioned in that pack. This goes back to c6807a40dc (clone: open a shortcut for connectivity check, 2013-05-26). We don't need to open that pack until we are about to start sending oids to our child rev-list process, since that's when we check whether they are in the self-contained pack. Let's push the opening of that pack further down in the function. That saves us from having to clean it up when we leave the function early (and by the time have opened the rev-list process, we never leave the function early, since we have to clean up the child process). Signed-off-by: Jeff King Reviewed-by: Jacob Keller Signed-off-by: Junio C Hamano --- connected.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/connected.c b/connected.c index 79403108dd8f57..530357de54f890 100644 --- a/connected.c +++ b/connected.c @@ -45,20 +45,6 @@ int check_connected(oid_iterate_fn fn, void *cb_data, return err; } - if (transport && transport->smart_options && - transport->smart_options->self_contained_and_connected && - transport->pack_lockfiles.nr == 1 && - strip_suffix(transport->pack_lockfiles.items[0].string, - ".keep", &base_len)) { - struct strbuf idx_file = STRBUF_INIT; - strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, - base_len); - strbuf_addstr(&idx_file, ".idx"); - new_pack = add_packed_git(the_repository, idx_file.buf, - idx_file.len, 1); - strbuf_release(&idx_file); - } - if (repo_has_promisor_remote(the_repository)) { /* * For partial clones, we don't want to have to do a regular @@ -90,7 +76,6 @@ int check_connected(oid_iterate_fn fn, void *cb_data, promisor_pack_found: ; } while ((oid = fn(cb_data)) != NULL); - free(new_pack); return 0; } @@ -127,15 +112,27 @@ int check_connected(oid_iterate_fn fn, void *cb_data, else rev_list.no_stderr = opt->quiet; - if (start_command(&rev_list)) { - free(new_pack); + if (start_command(&rev_list)) return error(_("Could not run 'git rev-list'")); - } sigchain_push(SIGPIPE, SIG_IGN); rev_list_in = xfdopen(rev_list.in, "w"); + if (transport && transport->smart_options && + transport->smart_options->self_contained_and_connected && + transport->pack_lockfiles.nr == 1 && + strip_suffix(transport->pack_lockfiles.items[0].string, + ".keep", &base_len)) { + struct strbuf idx_file = STRBUF_INIT; + strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string, + base_len); + strbuf_addstr(&idx_file, ".idx"); + new_pack = add_packed_git(the_repository, idx_file.buf, + idx_file.len, 1); + strbuf_release(&idx_file); + } + do { /* * If index-pack already checked that: From 0921da1724dab83ea1d266b996544674d0e318e4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:09:56 -0500 Subject: [PATCH 12/21] check_connected(): fix leak of pack-index mmap Since c6807a40dc (clone: open a shortcut for connectivity check, 2013-05-26), we may open a one-off packed_git struct to check what's in the pack we just received. At the end of the function we throw away the struct (rather than linking it into the repository struct as usual). We used to leak the struct until dd4143e7bf (connected.c: free the "struct packed_git", 2022-11-08), which calls free(). But that's not sufficient; inside the struct we'll have mmap'd the pack idx data from disk, which needs an munmap() call. Building with SANITIZE=leak doesn't detect this, because we are leaking our own mmap(), and it only finds heap allocations from malloc(). But if we use our compat mmap implementation like this: make NO_MMAP=MapsBecomeMallocs SANITIZE=leak then LSan will notice the leak, because now it's a regular heap buffer allocated by malloc(). We can fix it by calling close_pack(), which will free any associated memory. Note that we need to check for NULL ourselves; unlike free(), it is not safe to pass a NULL pointer to close_pack(). Signed-off-by: Jeff King Reviewed-by: Jacob Keller Signed-off-by: Junio C Hamano --- connected.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/connected.c b/connected.c index 530357de54f890..6718503649da8a 100644 --- a/connected.c +++ b/connected.c @@ -159,6 +159,9 @@ int check_connected(oid_iterate_fn fn, void *cb_data, err = error_errno(_("failed to close rev-list's stdin")); sigchain_pop(SIGPIPE); - free(new_pack); + if (new_pack) { + close_pack(new_pack); + free(new_pack); + } return finish_command(&rev_list) || err; } From e2f11392403ccb9ad304546d5be65dc917e0c95f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:12:29 -0500 Subject: [PATCH 13/21] pack-revindex: avoid double-loading .rev files The usual entry point for loading the pack revindex is the load_pack_revindex() function. It returns immediately if the packed_git has a non-NULL revindex or revindex data field (representing an in-memory or mmap'd .rev file, respectively), since the data is already loaded. But in 5a6072f631 (fsck: validate .rev file header, 2023-04-17) the fsck code path switched to calling load_pack_revindex_from_disk() directly, since it wants to check the on-disk data (if there is any). But that function does _not_ check to see if the data has already been loaded; it just maps the file, overwriting the revindex_map pointer (and pointing revindex_data inside that map). And in that case we've leaked the mmap() pointed to by revindex_map (if it was non-NULL). This usually doesn't happen, since fsck wouldn't need to load the revindex for any reason before we get to these checks. But there are some cases where it does. For example, is_promisor_object() runs odb_for_each_object() with the PACK_ORDER flag, which uses the revindex. This happens a few times in our test suite, but SANITIZE=leak doesn't detect it because we are leaking an mmap(), not a heap-allocated buffer from malloc(). However, if you build with NO_MMAP, then our compat mmap will read into a heap buffer instead, and LSan will complain. This causes failures in t5601, t0410, t5702, and t5616. We can fix it by checking for existing revindex_data when loading from disk. This is redundant when we're called from load_pack_revindex(), but it's a cheap check. The alternative is to teach check_pack_rev_indexes() in fsck to skip the load, but that seems messier; it doesn't otherwise know about internals like revindex_map and revindex_data. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-revindex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pack-revindex.c b/pack-revindex.c index 8598b941c8c419..392eb04e0159d9 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -277,6 +277,10 @@ int load_pack_revindex_from_disk(struct packed_git *p) { char *revindex_name; int ret; + + if (p->revindex_data) + return 0; + if (open_pack_index(p)) return -1; From b68e875bec29e538a67ad38009ea1c02fa877258 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Mar 2026 21:24:59 -0500 Subject: [PATCH 14/21] object-file: fix mmap() leak in odb_source_loose_read_object_stream() We mmap() a loose object file, storing the result in the local variable "mapped", which is eventually assigned into our stream struct as "st.mapped". If we hit an error, we jump to an error label which does: munmap(st.mapped, st.mapsize); to clean up. But this is wrong; we don't assign st.mapped until the end of the function, after all of the "goto error" jumps. So this munmap() is never cleaning up anything (st.mapped is always NULL, because we initialize the struct with calloc). Instead, we should feed the local variable to munmap(). This leak is due to 595296e124 (streaming: allocate stream inside the backend-specific logic, 2025-11-23), which introduced the local variable. Before that, we assigned the mmap result directly into st.mapped. It was probably switched there so that we do not have to allocate/free the struct when the map operation fails (e.g., because we don't have the loose object). Before that commit, the struct was passed in from the caller, so there was no allocation at all. You can see the leak in the test suite by building with: make SANITIZE=leak NO_MMAP=1 CC=clang and running t1060. We need NO_MMAP so that the mmap() is backed by an actual malloc(), which allows LSan to detect it. And the leak seems not to be detected when compiling with gcc, probably due to some internal compiler decisions about how the stack memory is written. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index e7e4c3348f9c1b..b156cc278b2feb 100644 --- a/object-file.c +++ b/object-file.c @@ -2150,7 +2150,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, return 0; error: git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); + munmap(mapped, mapsize); free(st); return -1; } From 00611d86c66f4230eb229b2b7dc5ac413aef2221 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Mar 2026 18:13:05 -0500 Subject: [PATCH 15/21] Makefile: turn on NO_MMAP when building with LSan The past few commits fixed some cases where we leak memory allocated by mmap(). Building with SANITIZE=leak doesn't detect these because it covers only heap buffers allocated by malloc(). But if we build with NO_MMAP, our compat mmap() implementation will allocate a heap buffer and pread() into it. And thus Lsan will detect these leaks for free. Using NO_MMAP is less performant, of course, since we have to use extra memory and read in the whole file, rather than faulting in pages from disk. But LSan builds are already slow, and this doesn't make them measurably worse. Getting extra coverage for our leak-checking is worth it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 8aa489f3b6812f..6b121daf1e893b 100644 --- a/Makefile +++ b/Makefile @@ -1594,6 +1594,7 @@ BASIC_CFLAGS += -DSHA1DC_FORCE_ALIGNED_ACCESS endif ifneq ($(filter leak,$(SANITIZERS)),) BASIC_CFLAGS += -O0 +NO_MMAP = CatchMapLeaks SANITIZE_LEAK = YesCompiledWithIt endif ifneq ($(filter address,$(SANITIZERS)),) From a8a69bbb64e1d25b327aed5925b1fbc086a0ba69 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 6 Mar 2026 11:25:13 -0500 Subject: [PATCH 16/21] meson: turn on NO_MMAP when building with LSan The previous commit taught the Makefile to turn on NO_MMAP in this instance. We should do the same with meson for consistency. We already do this for ASan builds, so we can just tweak one conditional. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index dd52efd1c87574..fc5d0f5954dab2 100644 --- a/meson.build +++ b/meson.build @@ -1417,7 +1417,7 @@ else 'getpagesize' : [], } - if get_option('b_sanitize').contains('address') + if get_option('b_sanitize').contains('address') or get_option('b_sanitize').contains('leak') libgit_c_args += '-DNO_MMAP' libgit_sources += 'compat/mmap.c' else From beca0ca4bed5d7eea405e872a197bf226c4b4a85 Mon Sep 17 00:00:00 2001 From: Omri Sarig Date: Sat, 7 Mar 2026 17:08:01 +0000 Subject: [PATCH 17/21] doc: make it easier to find custom command information Git supports creating additional commands through aliases, and through placement of executables with a "git-" prefix in the PATH. This information was not easy enough to find - users will look for this information around the command description, but the documentation exists in other locations. Update the "GIT COMMANDS" section to reference the relevant sections, making it easier for to find this information. Signed-off-by: Omri Sarig Signed-off-by: Junio C Hamano --- Documentation/git.adoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/git.adoc b/Documentation/git.adoc index ce099e78b8023e..ce7d3ce8598437 100644 --- a/Documentation/git.adoc +++ b/Documentation/git.adoc @@ -235,7 +235,10 @@ GIT COMMANDS ------------ We divide Git into high level ("porcelain") commands and low level -("plumbing") commands. +("plumbing") commands. For defining command aliases, see +linkgit:git-config[1] and look for descriptions of `alias.*`. +For installing custom "git" subcommands, see the description for +the 'PATH' environment variable in this manual. High-level commands (porcelain) ------------------------------- From d3edca979a1e916518bc2376e468609ddae2a217 Mon Sep 17 00:00:00 2001 From: Francesco Paparatto Date: Sat, 7 Mar 2026 11:36:31 +0100 Subject: [PATCH 18/21] t3310: avoid hiding failures from rev-parse in command substitutions Running `git` commands inside command substitutions like test "$(git rev-parse A)" = "$(git rev-parse B)" can hide failures from the `git` invocations and provide little diagnostic information when `test` fails. Use `test_cmp` when comparing against a stored expected value so mismatches show both expected and actual output. Use `test_cmp_rev` when comparing two revisions. These helpers produce clearer failure output, making it easier to understand what went wrong. Suggested-by: Eric Sunshine Signed-off-by: Francesco Paparatto Reviewed-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/t3310-notes-merge-manual-resolve.sh | 47 +++++++++++++++++---------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/t/t3310-notes-merge-manual-resolve.sh b/t/t3310-notes-merge-manual-resolve.sh index 597df5ebc0a582..8e3e71bb0953a9 100755 --- a/t/t3310-notes-merge-manual-resolve.sh +++ b/t/t3310-notes-merge-manual-resolve.sh @@ -227,7 +227,8 @@ test_expect_success 'merge z into m (== y) with default ("manual") resolver => C # Verify that current notes tree (pre-merge) has not changed (m == y) verify_notes y && verify_notes m && - test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" + git rev-parse refs/notes/m >actual && + test_cmp pre_merge_y actual ' cat <expect_notes_z @@ -375,8 +376,10 @@ EOF git notes merge --commit && notes_merge_files_gone && # Merge commit has pre-merge y and pre-merge z as parents - test "$(git rev-parse refs/notes/m^1)" = "$(cat pre_merge_y)" && - test "$(git rev-parse refs/notes/m^2)" = "$(cat pre_merge_z)" && + git rev-parse refs/notes/m^1 >actual && + test_cmp pre_merge_y actual && + git rev-parse refs/notes/m^2 >actual && + test_cmp pre_merge_z actual && # Merge commit mentions the notes refs merged git log -1 --format=%B refs/notes/m > merge_commit_msg && grep -q refs/notes/m merge_commit_msg && @@ -428,14 +431,16 @@ test_expect_success 'redo merge of z into m (== y) with default ("manual") resol # Verify that current notes tree (pre-merge) has not changed (m == y) verify_notes y && verify_notes m && - test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" + git rev-parse refs/notes/m >actual && + test_cmp pre_merge_y actual ' test_expect_success 'abort notes merge' ' git notes merge --abort && notes_merge_files_gone && # m has not moved (still == y) - test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" && + git rev-parse refs/notes/m >actual && + test_cmp pre_merge_y actual && # Verify that other notes refs has not changed (w, x, y and z) verify_notes w && verify_notes x && @@ -460,7 +465,8 @@ test_expect_success 'redo merge of z into m (== y) with default ("manual") resol # Verify that current notes tree (pre-merge) has not changed (m == y) verify_notes y && verify_notes m && - test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" + git rev-parse refs/notes/m >actual && + test_cmp pre_merge_y actual ' cat <expect_notes_m @@ -500,8 +506,10 @@ EOF git notes merge --commit && notes_merge_files_gone && # Merge commit has pre-merge y and pre-merge z as parents - test "$(git rev-parse refs/notes/m^1)" = "$(cat pre_merge_y)" && - test "$(git rev-parse refs/notes/m^2)" = "$(cat pre_merge_z)" && + git rev-parse refs/notes/m^1 >actual && + test_cmp pre_merge_y actual && + git rev-parse refs/notes/m^2 >actual && + test_cmp pre_merge_z actual && # Merge commit mentions the notes refs merged git log -1 --format=%B refs/notes/m > merge_commit_msg && grep -q refs/notes/m merge_commit_msg && @@ -539,7 +547,8 @@ test_expect_success 'redo merge of z into m (== y) with default ("manual") resol # Verify that current notes tree (pre-merge) has not changed (m == y) verify_notes y && verify_notes m && - test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" + git rev-parse refs/notes/m >actual && + test_cmp pre_merge_y actual ' cp expect_notes_w expect_notes_m @@ -548,7 +557,7 @@ cp expect_log_w expect_log_m test_expect_success 'reset notes ref m to somewhere else (w)' ' git update-ref refs/notes/m refs/notes/w && verify_notes m && - test "$(git rev-parse refs/notes/m)" = "$(git rev-parse refs/notes/w)" + test_cmp_rev refs/notes/m refs/notes/w ' test_expect_success 'fail to finalize conflicting merge if underlying ref has moved in the meantime (m != NOTES_MERGE_PARTIAL^1)' ' @@ -569,13 +578,15 @@ EOF test -f .git/NOTES_MERGE_WORKTREE/$commit_sha3 && test -f .git/NOTES_MERGE_WORKTREE/$commit_sha4 && # Refs are unchanged - test "$(git rev-parse refs/notes/m)" = "$(git rev-parse refs/notes/w)" && - test "$(git rev-parse refs/notes/y)" = "$(git rev-parse NOTES_MERGE_PARTIAL^1)" && - test "$(git rev-parse refs/notes/m)" != "$(git rev-parse NOTES_MERGE_PARTIAL^1)" && + test_cmp_rev refs/notes/m refs/notes/w && + test_cmp_rev refs/notes/y NOTES_MERGE_PARTIAL^1 && + test_cmp_rev ! refs/notes/m NOTES_MERGE_PARTIAL^1 && # Mention refs/notes/m, and its current and expected value in output test_grep -q "refs/notes/m" output && - test_grep -q "$(git rev-parse refs/notes/m)" output && - test_grep -q "$(git rev-parse NOTES_MERGE_PARTIAL^1)" output && + oid=$(git rev-parse refs/notes/m) && + test_grep -q "$oid" output && + oid=$(git rev-parse NOTES_MERGE_PARTIAL^1) && + test_grep -q "$oid" output && # Verify that other notes refs has not changed (w, x, y and z) verify_notes w && verify_notes x && @@ -587,7 +598,7 @@ test_expect_success 'resolve situation by aborting the notes merge' ' git notes merge --abort && notes_merge_files_gone && # m has not moved (still == w) - test "$(git rev-parse refs/notes/m)" = "$(git rev-parse refs/notes/w)" && + test_cmp_rev refs/notes/m refs/notes/w && # Verify that other notes refs has not changed (w, x, y and z) verify_notes w && verify_notes x && @@ -606,8 +617,8 @@ test_expect_success 'switch cwd before committing notes merge' ' test_must_fail git notes merge refs/notes/other && ( cd .git/NOTES_MERGE_WORKTREE && - echo "foo" > $(git rev-parse HEAD) && - echo "bar" >> $(git rev-parse HEAD) && + oid=$(git rev-parse HEAD) && + test_write_lines foo bar >"$oid" && git notes merge --commit ) && git notes show HEAD > actual_notes && From d1f33c753de68f63c945c3213f439081ed11c27b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 8 Mar 2026 10:57:02 +0100 Subject: [PATCH 19/21] history: initialize rev_info in cmd_history_reword() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git history reword expects a single valid revision argument and errors out if it doesn't get it. In that case the struct rev_info passed to release_revisions() for cleanup is still uninitialized, which can result in attempts to free(3) random pointers. Avoid that by initializing the structure. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/history.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/history.c b/builtin/history.c index 1cf6c668cfd814..88822a184fa5bc 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -425,7 +425,7 @@ static int cmd_history_reword(int argc, }; struct strbuf reflog_msg = STRBUF_INIT; struct commit *original, *rewritten; - struct rev_info revs; + struct rev_info revs = { 0 }; int ret; argc = parse_options(argc, argv, prefix, options, usage, 0); From 4c223571bef1c008c11ad5028072af862d3e7fe3 Mon Sep 17 00:00:00 2001 From: Tian Yuchen Date: Mon, 9 Mar 2026 14:51:40 +0800 Subject: [PATCH 20/21] patch-ids: document intentional const-casting in patch_id_neq() The hashmap API requires the comparison function to take const pointers. However, patch_id_neq() uses lazy evaluation to compute patch IDs on demand. As established in b3dfeebb (rebase: avoid computing unnecessary patch IDs, 2016-07-29), this avoids unnecessary work since not all objects in the hashmap will eventually be compared. Remove the ten-year-old "NEEDSWORK" comment and formally document this intentional design trade-off. Signed-off-by: Tian Yuchen Signed-off-by: Junio C Hamano --- patch-ids.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/patch-ids.c b/patch-ids.c index a5683b462c6e76..1fbc88cbec874e 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -41,7 +41,14 @@ static int patch_id_neq(const void *cmpfn_data, const struct hashmap_entry *entry_or_key, const void *keydata UNUSED) { - /* NEEDSWORK: const correctness? */ + /* + * We drop the 'const' modifier here intentionally. + * + * Even though eptr and entry_or_key are const, we want to + * lazily compute their .patch_id members; see b3dfeebb (rebase: + * avoid computing unnecessary patch IDs, 2016-07-29). So we cast + * the constness away with container_of(). + */ struct diff_options *opt = (void *)cmpfn_data; struct patch_id *a, *b; From ca1db8a0f7dc0dbea892e99f5b37c5fe5861be71 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 16 Mar 2026 10:48:02 -0700 Subject: [PATCH 21/21] The 17th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 005a98e0ff1a39..d7c67cbb078aeb 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -85,6 +85,9 @@ UI, Workflows & Features and various other branches listed on status.compareBranches configuration. + * "git repo structure" command learns to report maximum values on + various aspects of objects it inspects. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -187,6 +190,9 @@ Performance, Internal Implementation, Development Support etc. * The object source API is getting restructured to allow plugging new backends. + * Reduce dependence on the global the_hash_algo and the_repository + variables of wt-status code path. + Fixes since v2.53 ----------------- @@ -301,6 +307,22 @@ Fixes since v2.53 * "git for-each-repo" started from a secondary worktree did not work as expected, which has been corrected. (merge e87493b9b4 ds/for-each-repo-w-worktree later to maint). + + * The construct 'test "$(command)" = expectation' loses the exit + status from the command, which has been fixed by breaking up the + statement into pieces. + (merge d3edca979a fp/t3310-unhide-git-failures later to maint). + + * While discovering a ".git" directory, the code treats any stat() + failure as a sign that a filesystem entity .git does not exist + there, and ignores ".git" that is not a "gitdir" file or a + directory. The code has been tightened to notice and report + filesystem corruption better. + (merge 1dd27bfbfd ty/setup-error-tightening later to maint). + + * Plug a few leaks where mmap'ed memory regions are not unmapped. + (merge a8a69bbb64 jk/unleak-mmap later to maint). + * Other code cleanup, docfix, build fix, etc. (merge d79fff4a11 jk/remote-tracking-ref-leakfix later to maint). (merge 7a747f972d dd/t5403-modernise later to maint). @@ -338,3 +360,5 @@ Fixes since v2.53 (merge a56fa1ca05 lp/doc-gitprotocol-pack-fixes later to maint). (merge 0d6bb8b541 ss/t3700-modernize later to maint). (merge 63c00a677b ss/t9123-setup-inside-test-expect-success later to maint). + (merge beca0ca4be os/doc-git-custom-commands later to maint). + (merge 4c223571be ty/patch-ids-document-lazy-eval later to maint).