From 8b5636a57ff078ac368f246f813b156552a0726c Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 17 Oct 2025 18:37:52 +0200 Subject: [PATCH 001/553] Revert "gitk: Only restore window size from ~/.gitk, not position" This reverts commit b9bee11526ec (gitk: Only restore window size from ~/.gitk, not position, 2008-03-10). The earlier commit e9937d2a03a4 (Make gitk work reasonably well on Cygwin, 2007-02-01) reworked the window layout considerably. Much of this became irrelevant around 2011 after Cygwin gained an X11 server and switched to a supportable port of the Unix/X11 Tcl/Tk (it is now on the current 8.6 code base). Part of the necessary change was to restore the window size across sessions, but the position was also restored. This raised complaints on the mailing list[*], because Gitk was opened on the wrong monitor. b9bee11526ec was the compromise, because it was only the size that mattered for the Cygwin layout engine to work. I personally, find it annoying when Gitk pops up on a random location on the screen, in particular, since many other applications restore the window positions across sessions, so why not Gitk as well? (I do not operate multi-monitor setups, so I cannot test the case.) [*] https://lore.kernel.org/git/47AAA254.2020008@thorn.ws/ Helped-by: Mark Levedahl Signed-off-by: Johannes Sixt --- gitk | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/gitk b/gitk index 6e4d71d5852533..275f3538117f31 100755 --- a/gitk +++ b/gitk @@ -2764,17 +2764,9 @@ proc makewindow {} { .pwbottom add .bright .ctop add .pwbottom - # restore window width & height if known + # restore window position if known if {[info exists geometry(main)]} { - if {[scan $geometry(main) "%dx%d" w h] >= 2} { - if {$w > [winfo screenwidth .]} { - set w [winfo screenwidth .] - } - if {$h > [winfo screenheight .]} { - set h [winfo screenheight .] - } - wm geometry . "${w}x$h" - } + wm geometry . "$geometry(main)" } if {[info exists geometry(state)] && $geometry(state) eq "zoomed"} { From bf5a55ac5eaef91e87470d704613e6942500a810 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Fri, 17 Oct 2025 18:38:11 +0200 Subject: [PATCH 002/553] gitk: persist position and size of the Tags and Heads window The Tags and Heads window always opens at a default position and size, requiring users to reposition it each time. Remember its geometry between sessions in the config file as `geometry(showrefs)`. Note that the existing configuration is sourced in proc savestuff right before new settings are written. This makes the old settings available as local variables(!) and does not overwrite the current settings. Since we need access to the global geometry(showrefs), it is necessary to unset the local variable. Helped-by: Michael Rappazzo Signed-off-by: Johannes Sixt --- gitk | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/gitk b/gitk index 275f3538117f31..ed616613ae3c91 100755 --- a/gitk +++ b/gitk @@ -2131,12 +2131,14 @@ proc ttk_toplevel {w args} { return $w } -proc make_transient {window origin} { +proc make_transient {window origin {geometry ""}} { wm transient $window $origin - # Windows fails to place transient windows normally, so - # schedule a callback to center them on the parent. - if {[tk windowingsystem] eq {win32}} { + if {$geometry ne ""} { + after idle [list wm geometry $window $geometry] + } elseif {[tk windowingsystem] eq {win32}} { + # Windows fails to place transient windows normally, so + # schedule a callback to center them on the parent. after idle [list tk::PlaceWindow $window widget $origin] } } @@ -3106,6 +3108,11 @@ proc savestuff {w} { puts $f "set geometry(pwsash1) \"[.tf.histframe.pwclist sashpos 1] 1\"" puts $f "set geometry(botwidth) [winfo width .bleft]" puts $f "set geometry(botheight) [winfo height .bleft]" + unset -nocomplain geometry + global geometry + if {[info exists geometry(showrefs)]} { + puts $f "set geometry(showrefs) $geometry(showrefs)" + } array set view_save {} array set views {} @@ -10193,6 +10200,7 @@ proc rmbranch {} { proc showrefs {} { global showrefstop bgcolor fgcolor selectbgcolor global bglist fglist reflistfilter reflist maincursor + global geometry set top .showrefs set showrefstop $top @@ -10203,7 +10211,11 @@ proc showrefs {} { } ttk_toplevel $top wm title $top [mc "Tags and heads: %s" [file tail [pwd]]] - make_transient $top . + if {[info exists geometry(showrefs)]} { + make_transient $top . $geometry(showrefs) + } else { + make_transient $top . + } text $top.list -background $bgcolor -foreground $fgcolor \ -selectbackground $selectbgcolor -font mainfont \ -xscrollcommand "$top.xsb set" -yscrollcommand "$top.ysb set" \ @@ -10239,6 +10251,9 @@ proc showrefs {} { bind $top.list {sel_reflist %W %x %y; break} set reflist {} refill_reflist + # avoid being bound to child windows + bindtags $top [linsert [bindtags $top] 1 bind$top] + bind bind$top {set geometry(showrefs) [wm geometry %W]} } proc sel_reflist {w x y} { From e78ab370545d81a950fa3b2701dd7c72015ee802 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:38 +0100 Subject: [PATCH 003/553] packfile: use a `strmap` to store packs by name To allow fast lookups of a packfile by name we use a hashmap that has the packfile name as key and the pack itself as value. But while this is the perfect use case for a `strmap`, we instead use `struct hashmap` and store the hashmap entry in the packfile itself. Simplify the code by using a `strmap` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 24 ++++-------------------- packfile.h | 4 ++-- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/packfile.c b/packfile.c index 1ae2b2fe1eda77..04649e52920a1f 100644 --- a/packfile.c +++ b/packfile.c @@ -788,8 +788,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack->next = store->packs; store->packs = pack; - hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); - hashmap_add(&store->map, &pack->packmap_ent); + strmap_put(&store->packs_by_path, pack->pack_name, pack); } struct packed_git *packfile_store_load_pack(struct packfile_store *store, @@ -806,8 +805,7 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, strbuf_strip_suffix(&key, ".idx"); strbuf_addstr(&key, ".pack"); - p = hashmap_get_entry_from_hash(&store->map, strhash(key.buf), key.buf, - struct packed_git, packmap_ent); + p = strmap_get(&store->packs_by_path, key.buf); if (!p) { p = add_packed_git(store->odb->repo, idx_path, strlen(idx_path), local); @@ -2311,27 +2309,13 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l return 0; } -static int pack_map_entry_cmp(const void *cmp_data UNUSED, - const struct hashmap_entry *entry, - const struct hashmap_entry *entry2, - const void *keydata) -{ - const char *key = keydata; - const struct packed_git *pg1, *pg2; - - pg1 = container_of(entry, const struct packed_git, packmap_ent); - pg2 = container_of(entry2, const struct packed_git, packmap_ent); - - return strcmp(pg1->pack_name, key ? key : pg2->pack_name); -} - struct packfile_store *packfile_store_new(struct object_database *odb) { struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; INIT_LIST_HEAD(&store->mru); - hashmap_init(&store->map, pack_map_entry_cmp, NULL, 0); + strmap_init(&store->packs_by_path); return store; } @@ -2341,7 +2325,7 @@ void packfile_store_free(struct packfile_store *store) next = p->next; free(p); } - hashmap_clear(&store->map); + strmap_clear(&store->packs_by_path, 0); free(store); } diff --git a/packfile.h b/packfile.h index c9d0b93446b5f5..9da7f14317b02c 100644 --- a/packfile.h +++ b/packfile.h @@ -5,12 +5,12 @@ #include "object.h" #include "odb.h" #include "oidset.h" +#include "strmap.h" /* in odb.h */ struct object_info; struct packed_git { - struct hashmap_entry packmap_ent; struct packed_git *next; struct list_head mru; struct pack_window *windows; @@ -85,7 +85,7 @@ struct packfile_store { * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. */ - struct hashmap map; + struct strmap packs_by_path; /* * Whether packfiles have already been populated with this store's From f905a855b1d1e172ba1e51e03fc5ec531445575e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:39 +0100 Subject: [PATCH 004/553] packfile: move the MRU list into the packfile store Packfiles have two lists associated to them: - A list that keeps track of packfiles in the order that they were added to a packfile store. - A list that keeps track of packfiles in most-recently-used order so that packfiles that are more likely to contain a specific object are ordered towards the front. Both of these lists are hosted by `struct packed_git` itself, So to identify all packfiles in a repository you simply need to grab the first packfile and then iterate the `->next` pointers or the MRU list. This pattern has the problem that all packfiles are part of the same list, regardless of whether or not they belong to the same object source. With the upcoming pluggable object database effort this needs to change: packfiles should be contained by a single object source, and reading an object from any such packfile should use that source to look up the object. Consequently, we need to break up the global lists of packfiles into per-object-source lists. A first step towards this goal is to move those lists out of `struct packed_git` and into the packfile store. While the packfile store is currently sitting on the `struct object_database` level, the intent is to push it down one level into the `struct odb_source` in a subsequent patch series. Introduce a new `struct packfile_list` that is used to manage lists of packfiles and use it to store the list of most-recently-used packfiles in `struct packfile_store`. For now, the new list type is only used in a single spot, but we'll expand its usage in subsequent patches. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 ++--- midx.c | 2 +- packfile.c | 92 +++++++++++++++++++++++++++++++++++++----- packfile.h | 19 +++++++-- 4 files changed, 104 insertions(+), 18 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df137b4..5348aebbe9f190 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1706,8 +1706,8 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; + struct packfile_list_entry *e; struct odb_source *source; - struct list_head *pos; if (!exclude && local) { /* @@ -1748,12 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - packfile_store_get_packs_mru(the_repository->objects->packfiles)); + packfile_list_prepend(&the_repository->objects->packfiles->mru, p); if (want != -1) return want; } diff --git a/midx.c b/midx.c index 1d6269f957e781..8022be9a45ecb9 100644 --- a/midx.c +++ b/midx.c @@ -463,7 +463,7 @@ int prepare_midx_pack(struct multi_pack_index *m, p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); if (p) - list_add_tail(&p->mru, &r->objects->packfiles->mru); + packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index 04649e52920a1f..4d2d3b674f3fb0 100644 --- a/packfile.c +++ b/packfile.c @@ -47,6 +47,80 @@ static size_t pack_mapped; #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } +void packfile_list_clear(struct packfile_list *list) +{ + struct packfile_list_entry *e, *next; + + for (e = list->head; e; e = next) { + next = e->next; + free(e); + } + + list->head = list->tail = NULL; +} + +static struct packfile_list_entry *packfile_list_remove_internal(struct packfile_list *list, + struct packed_git *pack) +{ + struct packfile_list_entry *e, *prev; + + for (e = list->head, prev = NULL; e; prev = e, e = e->next) { + if (e->pack != pack) + continue; + + if (prev) + prev->next = e->next; + if (list->head == e) + list->head = e->next; + if (list->tail == e) + list->tail = prev; + + return e; + } + + return NULL; +} + +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack) +{ + free(packfile_list_remove_internal(list, pack)); +} + +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = list->head; + + list->head = entry; + if (!list->tail) + list->tail = entry; +} + +void packfile_list_append(struct packfile_list *list, struct packed_git *pack) +{ + struct packfile_list_entry *entry; + + entry = packfile_list_remove_internal(list, pack); + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->pack = pack; + } + entry->next = NULL; + + if (list->tail) { + list->tail->next = entry; + list->tail = entry; + } else { + list->head = list->tail = entry; + } +} + void pack_report(struct repository *repo) { fprintf(stderr, @@ -995,10 +1069,10 @@ static void packfile_store_prepare_mru(struct packfile_store *store) { struct packed_git *p; - INIT_LIST_HEAD(&store->mru); + packfile_list_clear(&store->mru); for (p = store->packs; p; p = p->next) - list_add_tail(&p->mru, &store->mru); + packfile_list_append(&store->mru, p); } void packfile_store_prepare(struct packfile_store *store) @@ -1040,10 +1114,10 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) return store->packs; } -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) { packfile_store_prepare(store); - return &store->mru; + return store->mru.head; } /* @@ -2048,7 +2122,7 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct list_head *pos; + struct packfile_list_entry *l; packfile_store_prepare(r->objects->packfiles); @@ -2059,10 +2133,11 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs) return 0; - list_for_each(pos, &r->objects->packfiles->mru) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (l = r->objects->packfiles->mru.head; l; l = l->next) { + struct packed_git *p = l->pack; + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packfiles->mru); + packfile_list_prepend(&r->objects->packfiles->mru, p); return 1; } } @@ -2314,7 +2389,6 @@ struct packfile_store *packfile_store_new(struct object_database *odb) struct packfile_store *store; CALLOC_ARRAY(store, 1); store->odb = odb; - INIT_LIST_HEAD(&store->mru); strmap_init(&store->packs_by_path); return store; } diff --git a/packfile.h b/packfile.h index 9da7f14317b02c..39ed1073e4ad79 100644 --- a/packfile.h +++ b/packfile.h @@ -12,7 +12,6 @@ struct object_info; struct packed_git { struct packed_git *next; - struct list_head mru; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -52,6 +51,20 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct packfile_list { + struct packfile_list_entry *head, *tail; +}; + +struct packfile_list_entry { + struct packfile_list_entry *next; + struct packed_git *pack; +}; + +void packfile_list_clear(struct packfile_list *list); +void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); +void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); +void packfile_list_append(struct packfile_list *list, struct packed_git *pack); + /* * A store that manages packfiles for a given object database. */ @@ -79,7 +92,7 @@ struct packfile_store { } kept_cache; /* A most-recently-used ordered version of the packs list. */ - struct list_head mru; + struct packfile_list mru; /* * A map of packfile names to packed_git structs for tracking which @@ -153,7 +166,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. */ -struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); /* * Open the packfile and add it to the store if it isn't yet known. Returns From 89219bc0cd09ada8a204e0ace0bd15decaea7d31 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:40 +0100 Subject: [PATCH 005/553] http: refactor subsystem to use `packfile_list`s The dumb HTTP protocol directly fetches packfiles from the remote server and temporarily stores them in a list of packfiles. Those packfiles are not yet added to the repository's packfile store until we finalize the whole fetch. Refactor the code to instead use a `struct packfile_list` to store those packs. This prepares us for a subsequent change where the `->next` pointer of `struct packed_git` will go away. Note that this refactoring creates some temporary duplication of code, as we now have both `packfile_list_find_oid()` and `find_oid_pack()`. The latter function will be removed in a subsequent commit though. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-push.c | 6 +++--- http-walker.c | 26 +++++++++----------------- http.c | 21 ++++++++------------- http.h | 5 +++-- packfile.c | 9 +++++++++ packfile.h | 8 ++++++++ 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b93a3..d86ce771198206 100644 --- a/http-push.c +++ b/http-push.c @@ -104,7 +104,7 @@ struct repo { int has_info_refs; int can_update_info_refs; int has_info_packs; - struct packed_git *packs; + struct packfile_list packs; struct remote_lock *locks; }; @@ -311,7 +311,7 @@ static void start_fetch_packed(struct transfer_request *request) struct transfer_request *check_request = request_queue_head; struct http_pack_request *preq; - target = find_oid_pack(&request->obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &request->obj->oid); if (!target) { fprintf(stderr, "Unable to fetch %s, will not be able to update server info refs\n", oid_to_hex(&request->obj->oid)); repo->can_update_info_refs = 0; @@ -683,7 +683,7 @@ static int add_send_request(struct object *obj, struct remote_lock *lock) get_remote_object_list(obj->oid.hash[0]); if (obj->flags & (REMOTE | PUSHING)) return 0; - target = find_oid_pack(&obj->oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, &obj->oid); if (target) { obj->flags |= REMOTE; return 0; diff --git a/http-walker.c b/http-walker.c index 0f7ae46d7f12c0..e886e6486646d1 100644 --- a/http-walker.c +++ b/http-walker.c @@ -15,7 +15,7 @@ struct alt_base { char *base; int got_indices; - struct packed_git *packs; + struct packfile_list packs; struct alt_base *next; }; @@ -324,11 +324,8 @@ static void process_alternates_response(void *callback_data) } else if (is_alternate_allowed(target.buf)) { warning("adding alternate object store: %s", target.buf); - newalt = xmalloc(sizeof(*newalt)); - newalt->next = NULL; + CALLOC_ARRAY(newalt, 1); newalt->base = strbuf_detach(&target, NULL); - newalt->got_indices = 0; - newalt->packs = NULL; while (tail->next != NULL) tail = tail->next; @@ -435,7 +432,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, if (fetch_indices(walker, repo)) return -1; - target = find_oid_pack(oid, repo->packs); + target = packfile_list_find_oid(repo->packs.head, oid); if (!target) return -1; close_pack_index(target); @@ -584,17 +581,15 @@ static void cleanup(struct walker *walker) if (data) { alt = data->alt; while (alt) { - struct packed_git *pack; + struct packfile_list_entry *e; alt_next = alt->next; - pack = alt->packs; - while (pack) { - struct packed_git *pack_next = pack->next; - close_pack(pack); - free(pack); - pack = pack_next; + for (e = alt->packs.head; e; e = e->next) { + close_pack(e->pack); + free(e->pack); } + packfile_list_clear(&alt->packs); free(alt->base); free(alt); @@ -612,14 +607,11 @@ struct walker *get_http_walker(const char *url) struct walker_data *data = xmalloc(sizeof(struct walker_data)); struct walker *walker = xmalloc(sizeof(struct walker)); - data->alt = xmalloc(sizeof(*data->alt)); + CALLOC_ARRAY(data->alt, 1); data->alt->base = xstrdup(url); for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) *s = 0; - data->alt->got_indices = 0; - data->alt->packs = NULL; - data->alt->next = NULL; data->got_alternates = -1; walker->corrupt_object_found = 0; diff --git a/http.c b/http.c index 17130823f006f2..41f850db16d19f 100644 --- a/http.c +++ b/http.c @@ -2413,8 +2413,9 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) return tmp; } -static int fetch_and_setup_pack_index(struct packed_git **packs_head, - unsigned char *sha1, const char *base_url) +static int fetch_and_setup_pack_index(struct packfile_list *packs, + unsigned char *sha1, + const char *base_url) { struct packed_git *new_pack, *p; char *tmp_idx = NULL; @@ -2448,12 +2449,11 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, if (ret) return -1; - new_pack->next = *packs_head; - *packs_head = new_pack; + packfile_list_prepend(packs, new_pack); return 0; } -int http_get_info_packs(const char *base_url, struct packed_git **packs_head) +int http_get_info_packs(const char *base_url, struct packfile_list *packs) { struct http_get_options options = {0}; int ret = 0; @@ -2477,7 +2477,7 @@ int http_get_info_packs(const char *base_url, struct packed_git **packs_head) !parse_oid_hex(data, &oid, &data) && skip_prefix(data, ".pack", &data) && (*data == '\n' || *data == '\0')) { - fetch_and_setup_pack_index(packs_head, oid.hash, base_url); + fetch_and_setup_pack_index(packs, oid.hash, base_url); } else { data = strchrnul(data, '\n'); } @@ -2541,14 +2541,9 @@ int finish_http_pack_request(struct http_pack_request *preq) } void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from) + struct packfile_list *list_to_remove_from) { - struct packed_git **lst = list_to_remove_from; - - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - + packfile_list_remove(list_to_remove_from, p); packfile_store_add_pack(the_repository->objects->packfiles, p); } diff --git a/http.h b/http.h index 553e16205ce201..f9d459340476e4 100644 --- a/http.h +++ b/http.h @@ -2,6 +2,7 @@ #define HTTP_H struct packed_git; +struct packfile_list; #include "git-zlib.h" @@ -190,7 +191,7 @@ struct curl_slist *http_append_auth_header(const struct credential *c, /* Helpers for fetching packs */ int http_get_info_packs(const char *base_url, - struct packed_git **packs_head); + struct packfile_list *packs); /* Helper for getting Accept-Language header */ const char *http_get_accept_language_header(void); @@ -226,7 +227,7 @@ void release_http_pack_request(struct http_pack_request *preq); * from http_get_info_packs() and have chosen a specific pack to fetch. */ void http_install_packfile(struct packed_git *p, - struct packed_git **list_to_remove_from); + struct packfile_list *list_to_remove_from); /* Helpers for fetching object */ struct http_object_request { diff --git a/packfile.c b/packfile.c index 4d2d3b674f3fb0..6aa2ca8ac9ee43 100644 --- a/packfile.c +++ b/packfile.c @@ -121,6 +121,15 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack) } } +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid) +{ + for (; packs; packs = packs->next) + if (find_pack_entry_one(oid, packs->pack)) + return packs->pack; + return NULL; +} + void pack_report(struct repository *repo) { fprintf(stderr, diff --git a/packfile.h b/packfile.h index 39ed1073e4ad79..a53336d722a42b 100644 --- a/packfile.h +++ b/packfile.h @@ -65,6 +65,14 @@ void packfile_list_remove(struct packfile_list *list, struct packed_git *pack); void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack); void packfile_list_append(struct packfile_list *list, struct packed_git *pack); +/* + * Find the pack within the "packs" list whose index contains the object + * "oid". For general object lookups, you probably don't want this; use + * find_pack_entry() instead. + */ +struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs, + const struct object_id *oid); + /* * A store that manages packfiles for a given object database. */ From 02a7f6ffab9ec7641f88032f30998976bca07820 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:41 +0100 Subject: [PATCH 006/553] packfile: fix approximation of object counts When approximating the number of objects in a repository we only take into account two data sources, the multi-pack index and the packfile indices, as both of these data structures allow us to easily figure out how many objects they contain. But the way we currently approximate the number of objects is broken in presence of a multi-pack index. This is due to two separate reasons: - We have recently introduced initial infrastructure for incremental multi-pack indices. Starting with that series, `num_objects` only counts the number of objects of a specific layer of the MIDX chain, so we do not take into account objects from parent layers. This issue is fixed by adding `num_objects_in_base`, which contains the sum of all objects in previous layers. - When using the multi-pack index we may count objects contained in packfiles twice: once via the multi-pack index, but then we again count them via the packfile itself. This issue is fixed by skipping any packfiles that have an MIDX. Overall, given that we _always_ count the packs, we can only end up overestimating the number of objects, and the overestimation is limited to a factor of two at most. The consequences of those issues are very limited though, as we only approximate object counts in a small number of cases: - When writing a commit-graph we use the approximate object count to display the upper limit of a progress display. - In `repo_find_unique_abbrev_r()` we use it to specify a lower limit of how many hex digits we want to abbreviate to. Given that we use power-of-two here to derive the lower limit we may end up with an abbreviated hash that is one digit longer than required. - In `estimate_repack_memory()` we may end up overestimating how much memory a repack needs to pack objects. Conseuqently, we may end up dropping some packfiles from a repack. None of these are really game-changing. But it's nice to fix those issues regardless. While at it, convert the code to use `repo_for_each_pack()`. Furthermore, use `odb_prepare_alternates()` instead of explicitly preparing the packfile store. We really only want to prepare the object database sources, and `get_multi_pack_index()` already knows to prepare the packfile store for us. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index 6aa2ca8ac9ee43..b07509b69bd7cc 100644 --- a/packfile.c +++ b/packfile.c @@ -1143,16 +1143,16 @@ unsigned long repo_approximate_object_count(struct repository *r) unsigned long count = 0; struct packed_git *p; - packfile_store_prepare(r->objects->packfiles); + odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { struct multi_pack_index *m = get_multi_pack_index(source); if (m) - count += m->num_objects; + count += m->num_objects + m->num_objects_in_base; } - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (open_pack_index(p)) + repo_for_each_pack(r, p) { + if (p->multi_pack_index || open_pack_index(p)) continue; count += p->num_objects; } From 0d0e4b5954e97fcdfd0a4120e17e2c570497981a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:42 +0100 Subject: [PATCH 007/553] builtin/pack-objects: simplify logic to find kept or nonlocal objects The function `has_sha1_pack_kept_or_nonlocal()` takes an object ID and then searches through packed objects to figure out whether the object exists in a kept or non-local pack. As a performance optimization we remember the packfile that contains a given object ID so that the next call to the function first checks that same packfile again. The way this is written is rather hard to follow though, as the caching mechanism is intertwined with the loop that iterates through the packs. Consequently, we need to do some gymnastics to re-start the iteration if the cached pack does not contain the objects. Refactor this so that we check the cached packfile at the beginning. We don't have to re-verify whether the packfile meets the properties as we have already verified those when storing the pack in `last_found` in the first place. So all we need to do is to use `find_pack_entry_one()` to check whether the pack contains the object ID, and to skip the cached pack in the loop so that we don't search it twice. Furthermore, stop using the `(void *)1` sentinel value and instead use a simple `NULL` pointer to indicate that we don't have a last-found pack yet. This refactoring significantly simplifies the logic and makes it much easier to follow. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5348aebbe9f190..b83eb8ead14139 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4388,27 +4388,27 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - struct packfile_store *packs = the_repository->objects->packfiles; - static struct packed_git *last_found = (void *)1; + static struct packed_git *last_found = NULL; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - packfile_store_get_packs(packs); + if (last_found && find_pack_entry_one(oid, last_found)) + return 1; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + repo_for_each_pack(the_repository, p) { + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found) + continue; + + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = packfile_store_get_packs(packs); - else - p = p->next; - if (p == last_found) - p = p->next; } + return 0; } From 589127caa73090040200989ff4d24c3d54f473f2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:43 +0100 Subject: [PATCH 008/553] packfile: move list of packs into the packfile store Move the list of packs into the packfile store. This follows the same logic as in a previous commit, where we moved the most-recently-used list of packs, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 4 +-- packfile.c | 83 +++++++++++++++++++------------------------ packfile.h | 16 +++------ 3 files changed, 43 insertions(+), 60 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 215295c1561f57..6fe6e9bc61d81b 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -978,7 +978,7 @@ static int store_object( if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1179,7 +1179,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) duplicate_count_by_type[OBJ_BLOB]++; truncate_pack(&checkpoint); - } else if (find_oid_pack(&oid, packfile_store_get_packs(packs))) { + } else if (packfile_list_find_oid(packfile_store_get_packs(packs), &oid)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/packfile.c b/packfile.c index b07509b69bd7cc..71e95ae11c56d2 100644 --- a/packfile.c +++ b/packfile.c @@ -356,13 +356,14 @@ static void scan_windows(struct packed_git *p, static int unuse_one_window(struct packed_git *current) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *lru_w = NULL, *lru_l = NULL; if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = current->repo->objects->packfiles->packs; p; p = p->next) - scan_windows(p, &lru_p, &lru_w, &lru_l); + for (e = current->repo->objects->packfiles->packs.head; e; e = e->next) + scan_windows(e->pack, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); pack_mapped -= lru_w->len; @@ -542,14 +543,15 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc static int close_one_pack(struct repository *r) { - struct packed_git *p, *lru_p = NULL; + struct packfile_list_entry *e; + struct packed_git *lru_p = NULL; struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = r->objects->packfiles->packs; p; p = p->next) { - if (p->pack_fd == -1) + for (e = r->objects->packfiles->packs.head; e; e = e->next) { + if (e->pack->pack_fd == -1) continue; - find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); + find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); } if (lru_p) @@ -868,8 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - pack->next = store->packs; - store->packs = pack; + packfile_list_prepend(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1046,9 +1047,10 @@ static void prepare_packed_git_one(struct odb_source *source) string_list_clear(data.garbage, 0); } -DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next); +DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); -static int sort_pack(const struct packed_git *a, const struct packed_git *b) +static int sort_pack(const struct packfile_list_entry *a, + const struct packfile_list_entry *b) { int st; @@ -1058,7 +1060,7 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * remote ones could be on a network mounted filesystem. * Favor local ones for these reasons. */ - st = a->pack_local - b->pack_local; + st = a->pack->pack_local - b->pack->pack_local; if (st) return -st; @@ -1067,21 +1069,19 @@ static int sort_pack(const struct packed_git *a, const struct packed_git *b) * and more recent objects tend to get accessed more * often. */ - if (a->mtime < b->mtime) + if (a->pack->mtime < b->pack->mtime) return 1; - else if (a->mtime == b->mtime) + else if (a->pack->mtime == b->pack->mtime) return 0; return -1; } static void packfile_store_prepare_mru(struct packfile_store *store) { - struct packed_git *p; - packfile_list_clear(&store->mru); - for (p = store->packs; p; p = p->next) - packfile_list_append(&store->mru, p); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + packfile_list_append(&store->mru, e->pack); } void packfile_store_prepare(struct packfile_store *store) @@ -1096,7 +1096,11 @@ void packfile_store_prepare(struct packfile_store *store) prepare_multi_pack_index_one(source); prepare_packed_git_one(source); } - sort_packs(&store->packs, sort_pack); + + sort_packs(&store->packs.head, sort_pack); + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + if (!e->next) + store->packs.tail = e; packfile_store_prepare_mru(store); store->initialized = true; @@ -1108,7 +1112,7 @@ void packfile_store_reprepare(struct packfile_store *store) packfile_store_prepare(store); } -struct packed_git *packfile_store_get_packs(struct packfile_store *store) +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store) { packfile_store_prepare(store); @@ -1120,7 +1124,7 @@ struct packed_git *packfile_store_get_packs(struct packfile_store *store) prepare_midx_pack(m, i); } - return store->packs; + return store->packs.head; } struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) @@ -1276,11 +1280,11 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { - struct packed_git *p; + struct packfile_list_entry *e; - for (p = r->objects->packfiles->packs; p; p = p->next) - if (oidset_contains(&p->bad_objects, oid)) - return p; + for (e = r->objects->packfiles->packs.head; e; e = e->next) + if (oidset_contains(&e->pack->bad_objects, oid)) + return e->pack; return NULL; } @@ -2088,19 +2092,6 @@ int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs) -{ - struct packed_git *p; - - for (p = packs; p; p = p->next) { - if (find_pack_entry_one(oid, p)) - return p; - } - return NULL; - -} - static int fill_pack_entry(const struct object_id *oid, struct pack_entry *e, struct packed_git *p) @@ -2139,7 +2130,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (source->midx && fill_midx_entry(source->midx, oid, e)) return 1; - if (!r->objects->packfiles->packs) + if (!r->objects->packfiles->packs.head) return 0; for (l = r->objects->packfiles->mru.head; l; l = l->next) { @@ -2404,19 +2395,19 @@ struct packfile_store *packfile_store_new(struct object_database *odb) void packfile_store_free(struct packfile_store *store) { - for (struct packed_git *p = store->packs, *next; p; p = next) { - next = p->next; - free(p); - } + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) + free(e->pack); + packfile_list_clear(&store->packs); + strmap_clear(&store->packs_by_path, 0); free(store); } void packfile_store_close(struct packfile_store *store) { - for (struct packed_git *p = store->packs; p; p = p->next) { - if (p->do_not_close) + for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) { + if (e->pack->do_not_close) BUG("want to close pack marked 'do-not-close'"); - close_pack(p); + close_pack(e->pack); } } diff --git a/packfile.h b/packfile.h index a53336d722a42b..d95275e666c95e 100644 --- a/packfile.h +++ b/packfile.h @@ -11,7 +11,6 @@ struct object_info; struct packed_git { - struct packed_git *next; struct pack_window *windows; off_t pack_size; const void *index_data; @@ -83,7 +82,7 @@ struct packfile_store { * The list of packfiles in the order in which they are being added to * the store. */ - struct packed_git *packs; + struct packfile_list packs; /* * Cache of packfiles which are marked as "kept", either because there @@ -163,13 +162,14 @@ void packfile_store_add_pack(struct packfile_store *store, * repository. */ #define repo_for_each_pack(repo, p) \ - for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) + for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \ + ((p) = (e ? e->pack : NULL)); e = e->next) /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. */ -struct packed_git *packfile_store_get_packs(struct packfile_store *store); +struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); /* * Get all packs in most-recently-used order. @@ -266,14 +266,6 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); */ unsigned long repo_approximate_object_count(struct repository *r); -/* - * Find the pack within the "packs" list whose index contains the object "oid". - * For general object lookups, you probably don't want this; use - * find_pack_entry() instead. - */ -struct packed_git *find_oid_pack(const struct object_id *oid, - struct packed_git *packs); - void pack_report(struct repository *repo); /* From 6aff1f25a046f3dcd8a78b0c61414fa2d1c9a93c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:44 +0100 Subject: [PATCH 009/553] packfile: always add packfiles to MRU when adding a pack When preparing the packfile store we know to also prepare the MRU list of packfiles with all packs that are currently loaded in the store via `packfile_store_prepare_mru()`. So we know that the list of packs in the MRU list should match the list of packs in the non-MRU list. But there are some direct or indirect callsites that add a packfile to the store via `packfile_store_add_pack()` without adding the pack to the MRU. And while functions that access the MRU (e.g. `find_pack_entry()`) know to call `packfile_store_prepare()`, which knows to prepare the MRU via `packfile_store_prepare_mru()`, that operation will be turned into a no-op because the packfile store is already prepared. So this will not cause us to add the packfile to the MRU, and consequently we won't be able to find the packfile in our MRU list. There are only a handful of callers outside of "packfile.c" that add a packfile to the store: - "builtin/fast-import.c" adds multiple packs of imported objects, but it knows to look up objects via `packfile_store_get_packs()`. This function does not use the MRU, so we're good. - "builtin/index-pack.c" adds the indexed pack to the store in case it needs to perform consistency checks on its objects. - "http.c" adds the fetched pack to the store so that we can access its objects. In all of these cases we actually want to access the contained objects. And luckily, reading these objects works as expected: 1. We eventually end up in `do_oid_object_info_extended()`. 2. Calling `find_pack_entry()` fails because the MRU list doesn't contain the newly added packfile. 3. The callers don't pass `OBJECT_INFO_QUICK`, so we end up repreparing the object database. This will also cause us to reprepare the MRU list. 4. We now retry reading the object via `find_pack_entry()`, and now we succeed because the MRU list got populated. This logic feels quite fragile: we intentionally add the packfile to the store, but we then ultimately rely on repreparing the entire store only to make the packfile accessible. While we do the correct thing in `do_oid_object_info_extended()`, other sites that access the MRU may not know to reprepare. But besides being fragile it's also a waste of resources: repreparing the object database requires us to re-read the alternates file and discard any caches. Refactor the code so that we unconditionally add packfiles to the MRU when adding them to a packfile store. This makes the logic less fragile and ensures that we don't have to reprepare the store to make the pack accessible. Note that this does not allow us to drop `packfile_store_prepare_mru()` just yet: while the MRU list is already populated with all packs now, the order in which we add these packs is indeterministic for most of the part. So by first calling `sort_pack()` on the other packfile list and then re-preparing the MRU list we inherit its sorting. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- midx.c | 2 -- packfile.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 8022be9a45ecb9..24e1e721754d0c 100644 --- a/midx.c +++ b/midx.c @@ -462,8 +462,6 @@ int prepare_midx_pack(struct multi_pack_index *m, m->pack_names[pack_int_id]); p = packfile_store_load_pack(r->objects->packfiles, pack_name.buf, m->source->local); - if (p) - packfile_list_append(&m->source->odb->packfiles->mru, p); strbuf_release(&pack_name); if (!p) { diff --git a/packfile.c b/packfile.c index 71e95ae11c56d2..60f2e42876a823 100644 --- a/packfile.c +++ b/packfile.c @@ -871,6 +871,7 @@ void packfile_store_add_pack(struct packfile_store *store, pack_open_fds++; packfile_list_prepend(&store->packs, pack); + packfile_list_append(&store->mru, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } From c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:45 +0100 Subject: [PATCH 010/553] packfile: track packs via the MRU list exclusively We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- packfile.c | 27 +++++++-------------------- packfile.h | 27 +++++++++++++++++---------- 3 files changed, 26 insertions(+), 32 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b83eb8ead14139..0e4e9f80682fd6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->mru, p); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index 60f2e42876a823..378b0b1920d493 100644 --- a/packfile.c +++ b/packfile.c @@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - packfile_list_prepend(&store->packs, pack); - packfile_list_append(&store->mru, pack); - + packfile_list_append(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a, return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - packfile_list_clear(&store->mru); - - for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) - packfile_list_append(&store->mru, e->pack); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store) if (!e->next) store->packs.tail = e; - packfile_store_prepare_mru(store); store->initialized = true; } @@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->mru.head; -} - /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs.head) return 0; - for (l = r->objects->packfiles->mru.head; l; l = l->next) { + for (l = r->objects->packfiles->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - packfile_list_prepend(&r->objects->packfiles->mru, p); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; + repo->objects->packfiles->skip_mru_updates = true; repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; @@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index d95275e666c95e..27ba607e7c5eae 100644 --- a/packfile.h +++ b/packfile.h @@ -79,8 +79,8 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ struct packfile_list packs; @@ -98,9 +98,6 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct packfile_list mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -112,6 +109,21 @@ struct packfile_store { * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -171,11 +183,6 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); -/* - * Get all packs in most-recently-used order. - */ -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a From f82e430b4e46120e0ef67959e0ef9d8ab9282c56 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:56 +0100 Subject: [PATCH 011/553] odb: fix subtle logic to check whether an alternate is usable When adding an alternate to the object database we first check whether or not the path is usable. A path is usable if: - It actually exists. - We don't have it in our object sources yet. While the former check is trivial enough, the latter part is somewhat subtle and prone for bugs. This is because the function doesn't only check whether or not the given path is usable. But if it _is_ usable, we also store that path in the map of object sources immediately. The tricky part here is that the path that gets stored in the map is _not_ copied. Instead, we rely on the fact that subsequent code uses `strbuf_detach()` to store the exact same allocated memory in the created object source. Consequently, the memory is owned by the source but _also_ stored in the map. This subtlety is easy to miss, so if one decides to refactor this code one can easily end up breaking this mechanism. Make the relationship more explicit by not storing the path as part of `alt_odb_usable()`. Instead, store the path after we have created the source so that we can use the source's path pointer directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/odb.c b/odb.c index 00a6e71568b598..57d85ed9505e3f 100644 --- a/odb.c +++ b/odb.c @@ -86,17 +86,16 @@ int odb_mkstemp(struct object_database *odb, /* * Return non-zero iff the path is usable as an alternate object database. */ -static int alt_odb_usable(struct object_database *o, - struct strbuf *path, - const char *normalized_objdir, khiter_t *pos) +static int alt_odb_usable(struct object_database *o, const char *path, + const char *normalized_objdir) { int r; /* Detect cases where alternate disappeared */ - if (!is_directory(path->buf)) { + if (!is_directory(path)) { error(_("object directory %s does not exist; " "check .git/objects/info/alternates"), - path->buf); + path); return 0; } @@ -113,11 +112,14 @@ static int alt_odb_usable(struct object_database *o, assert(r == 1); /* never used */ kh_value(o->source_by_path, p) = o->sources; } - if (fspatheq(path->buf, normalized_objdir)) + + if (fspatheq(path, normalized_objdir)) + return 0; + + if (kh_get_odb_path_map(o->source_by_path, path) < kh_end(o->source_by_path)) return 0; - *pos = kh_put_odb_path_map(o->source_by_path, path->buf, &r); - /* r: 0 = exists, 1 = never used, 2 = deleted */ - return r == 0 ? 0 : 1; + + return 1; } /* @@ -148,6 +150,7 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, struct strbuf pathbuf = STRBUF_INIT; struct strbuf tmp = STRBUF_INIT; khiter_t pos; + int ret; if (!is_absolute_path(dir) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); @@ -172,20 +175,21 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, strbuf_reset(&tmp); strbuf_realpath(&tmp, odb->sources->path, 1); - if (!alt_odb_usable(odb, &pathbuf, tmp.buf, &pos)) + if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) goto error; CALLOC_ARRAY(alternate, 1); alternate->odb = odb; alternate->local = false; - /* pathbuf.buf is already in r->objects->source_by_path */ alternate->path = strbuf_detach(&pathbuf, NULL); /* add the alternate entry */ *odb->sources_tail = alternate; odb->sources_tail = &(alternate->next); - alternate->next = NULL; - assert(odb->source_by_path); + + pos = kh_put_odb_path_map(odb->source_by_path, alternate->path, &ret); + if (!ret) + BUG("source must not yet exist"); kh_value(odb->source_by_path, pos) = alternate; /* recursively add alternates */ From 0820a4b120f310d87ac8817ade63896a901c9267 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:57 +0100 Subject: [PATCH 012/553] odb: introduce `odb_source_new()` We have three different locations where we create a new ODB source. Deduplicate the logic via a new `odb_source_new()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 23 ++++++++++++++++------- odb.h | 4 ++++ repository.c | 14 +++++++++----- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 57d85ed9505e3f..d2d4c514ae5c09 100644 --- a/odb.c +++ b/odb.c @@ -141,6 +141,20 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source *source; + + CALLOC_ARRAY(source, 1); + source->odb = odb; + source->local = local; + source->path = xstrdup(path); + + return source; +} + static struct odb_source *link_alt_odb_entry(struct object_database *odb, const char *dir, const char *relative_base, @@ -178,10 +192,7 @@ static struct odb_source *link_alt_odb_entry(struct object_database *odb, if (!alt_odb_usable(odb, pathbuf.buf, tmp.buf)) goto error; - CALLOC_ARRAY(alternate, 1); - alternate->odb = odb; - alternate->local = false; - alternate->path = strbuf_detach(&pathbuf, NULL); + alternate = odb_source_new(odb, pathbuf.buf, false); /* add the alternate entry */ *odb->sources_tail = alternate; @@ -341,9 +352,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Make a new primary odb and link the old primary ODB in as an * alternate */ - source = xcalloc(1, sizeof(*source)); - source->odb = odb; - source->path = xstrdup(dir); + source = odb_source_new(odb, dir, false); /* * Disable ref updates while a temporary odb is active, since diff --git a/odb.h b/odb.h index e6602dd90c833c..2bec895d1352f4 100644 --- a/odb.h +++ b/odb.h @@ -89,6 +89,10 @@ struct odb_source { char *path; }; +struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local); + struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/repository.c b/repository.c index 6faf5c73981ebf..6aaa7ba00869bf 100644 --- a/repository.c +++ b/repository.c @@ -160,20 +160,24 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; + char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); if (!repo->objects->sources) { - CALLOC_ARRAY(repo->objects->sources, 1); - repo->objects->sources->odb = repo->objects; - repo->objects->sources->local = true; + repo->objects->sources = odb_source_new(repo->objects, + objects_path, true); repo->objects->sources_tail = &repo->objects->sources->next; + free(objects_path); + } else { + free(repo->objects->sources->path); + repo->objects->sources->path = objects_path; } - expand_base_dir(&repo->objects->sources->path, o->object_dir, - repo->commondir, "objects"); repo->objects->sources->disable_ref_updates = o->disable_ref_updates; From c2da110411919387fef0f869181fb510d06295d8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:58 +0100 Subject: [PATCH 013/553] odb: adjust naming to free object sources The functions `free_object_directory()` and `free_object_directories()` are responsible for freeing a single object source or all object sources connected to an object database, respectively. The associated structure has been renamed from `struct object_directory` to `struct odb_source` in a1e2581a1e (object-store: rename `object_directory` to `odb_source`, 2025-07-01) though, so the names are somewhat stale nowadays. Rename them to mention the new struct name instead. Furthermore, while at it, adapt them to our modern naming schema where we first have the subject followed by a verb. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index d2d4c514ae5c09..77490d7fdbeb4b 100644 --- a/odb.c +++ b/odb.c @@ -365,7 +365,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, return source->next; } -static void free_object_directory(struct odb_source *source) +static void odb_source_free(struct odb_source *source) { free(source->path); odb_clear_loose_cache(source); @@ -387,7 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, BUG("we expect the old primary object store to be the first alternate"); odb->sources = restore_source; - free_object_directory(cur_source); + odb_source_free(cur_source); } char *compute_alternate_path(const char *path, struct strbuf *err) @@ -1015,13 +1015,13 @@ struct object_database *odb_new(struct repository *repo) return o; } -static void free_object_directories(struct object_database *o) +static void odb_free_sources(struct object_database *o) { while (o->sources) { struct odb_source *next; next = o->sources->next; - free_object_directory(o->sources); + odb_source_free(o->sources); o->sources = next; } kh_destroy_odb_path_map(o->source_by_path); @@ -1039,7 +1039,7 @@ void odb_clear(struct object_database *o) o->commit_graph = NULL; o->commit_graph_attempted = 0; - free_object_directories(o); + odb_free_sources(o); o->sources_tail = NULL; o->loaded_alternates = 0; From 0cc12dedef2885dba8cf2635697767d394baf91f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:41:59 +0100 Subject: [PATCH 014/553] object-file: move `fetch_if_missing` The `fetch_if_missing` global variable is declared in "object-file.h" but defined in "odb.c". The variable relates to the whole object database instead of only loose objects, so move the declaration into "odb.h" accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.h | 8 -------- odb.h | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/object-file.h b/object-file.h index 3fd48dcafbf1dc..097e9764be169e 100644 --- a/object-file.h +++ b/object-file.h @@ -7,14 +7,6 @@ struct index_state; -/* - * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing - * blobs. This has a difference only if extensions.partialClone is set. - * - * Its default value is 1. - */ -extern int fetch_if_missing; - enum { INDEX_WRITE_OBJECT = (1 << 0), INDEX_FORMAT_CHECK = (1 << 1), diff --git a/odb.h b/odb.h index 2bec895d1352f4..2346ffeca85961 100644 --- a/odb.h +++ b/odb.h @@ -14,6 +14,14 @@ struct strbuf; struct repository; struct multi_pack_index; +/* + * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing + * blobs. This has a difference only if extensions.partialClone is set. + * + * Its default value is 1. + */ +extern int fetch_if_missing; + /* * Compute the exact path an alternate is at and returns it. In case of * error NULL is returned and the human readable error is added to `err` From ece43d9dc70b1717484ee78b66aef4f9390c2b2b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:00 +0100 Subject: [PATCH 015/553] object-file: introduce `struct odb_source_loose` Currently, all state that relates to loose objects is held directly by the `struct odb_source`. Introduce a new `struct odb_source_loose` to hold the state instead so that it is entirely self-contained. This structure will eventually morph into the backend for accessing loose objects. As such, this is part of the refactorings to introduce pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 13 +++++++++++++ object-file.h | 7 +++++++ odb.c | 2 ++ odb.h | 3 +++ 4 files changed, 25 insertions(+) diff --git a/object-file.c b/object-file.c index 4675c8ed6b67eb..cd6aa561fa7db2 100644 --- a/object-file.c +++ b/object-file.c @@ -1995,3 +1995,16 @@ void object_file_transaction_commit(struct odb_transaction *transaction) transaction->odb->transaction = NULL; free(transaction); } + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} + +void odb_source_loose_free(struct odb_source_loose *loose) +{ + free(loose); +} diff --git a/object-file.h b/object-file.h index 097e9764be169e..695a7e8e7c4b0b 100644 --- a/object-file.h +++ b/object-file.h @@ -18,6 +18,13 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct odb_source; +struct odb_source_loose { + struct odb_source *source; +}; + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source); +void odb_source_loose_free(struct odb_source_loose *loose); + /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/odb.c b/odb.c index 77490d7fdbeb4b..2d06ab0bb85c27 100644 --- a/odb.c +++ b/odb.c @@ -151,6 +151,7 @@ struct odb_source *odb_source_new(struct object_database *odb, source->odb = odb; source->local = local; source->path = xstrdup(path); + source->loose = odb_source_loose_new(source); return source; } @@ -368,6 +369,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, static void odb_source_free(struct odb_source *source) { free(source->path); + odb_source_loose_free(source->loose); odb_clear_loose_cache(source); loose_object_map_clear(&source->loose_map); free(source); diff --git a/odb.h b/odb.h index 2346ffeca85961..49b398bedae640 100644 --- a/odb.h +++ b/odb.h @@ -48,6 +48,9 @@ struct odb_source { /* Object database that owns this object source. */ struct object_database *odb; + /* Private state for loose objects. */ + struct odb_source_loose *loose; + /* * Used to store the results of readdir(3) calls when we are OK * sacrificing accuracy due to races for speed. That includes From 90a93f9dea88532623ef7422dbc21d8dc70a58dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:01 +0100 Subject: [PATCH 016/553] object-file: move loose object cache into loose source Our loose objects use a cache that (optionally) stores all objects for each of the opened sharding directories. This cache is located in the `struct odb_source`, but now that we have `struct odb_source_loose` it makes sense to move it into the latter structure so that all state that relates to loose objects is entirely self-contained. Do so. While at it, rename corresponding functions to have a prefix that relates to `struct odb_source_loose`. Note that despite this prefix, the functions still accept a `struct odb_source` as input. This is done intentionally: once we introduce pluggable object databases, we will continue to accept this struct but then do a cast inside these functions to `struct odb_source_loose`. This design is similar to how we do it for our ref backends. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 9 +++++---- object-file.c | 35 +++++++++++++++++++---------------- object-file.h | 16 ++++++++++++++-- object-name.c | 2 +- odb.c | 1 - odb.h | 12 ------------ 6 files changed, 39 insertions(+), 36 deletions(-) diff --git a/loose.c b/loose.c index e8ea6e7e24ba31..8cc7573ff2b2d9 100644 --- a/loose.c +++ b/loose.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "hash.h" #include "path.h" +#include "object-file.h" #include "odb.h" #include "hex.h" #include "repository.h" @@ -54,7 +55,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose_objects_cache, compat_oid); + oidtree_insert(source->loose->cache, compat_oid); return inserted; } @@ -66,9 +67,9 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source if (!source->loose_map) loose_object_map_init(&source->loose_map); - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); diff --git a/object-file.c b/object-file.c index cd6aa561fa7db2..fef00d6d3d082a 100644 --- a/object-file.c +++ b/object-file.c @@ -223,7 +223,7 @@ static int quick_has_loose(struct repository *r, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_loose_cache(source, oid), oid)) + if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) return 1; } return 0; @@ -1802,44 +1802,44 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid) +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]); + size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose_objects_subdir_seen[word_index]; + bitmap = &source->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose_objects_cache; - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + return source->loose->cache; + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose_objects_cache); + source->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose_objects_cache; + return source->loose->cache; } void odb_clear_loose_cache(struct odb_source *source) { - oidtree_clear(source->loose_objects_cache); - FREE_AND_NULL(source->loose_objects_cache); - memset(&source->loose_objects_subdir_seen, 0, - sizeof(source->loose_objects_subdir_seen)); + oidtree_clear(source->loose->cache); + FREE_AND_NULL(source->loose->cache); + memset(&source->loose->subdir_seen, 0, + sizeof(source->loose->subdir_seen)); } static int check_stream_oid(git_zstream *stream, @@ -2006,5 +2006,8 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source *source) void odb_source_loose_free(struct odb_source_loose *loose) { + if (!loose) + return; + odb_clear_loose_cache(loose->source); free(loose); } diff --git a/object-file.h b/object-file.h index 695a7e8e7c4b0b..90da69cf5f7d59 100644 --- a/object-file.h +++ b/object-file.h @@ -20,6 +20,18 @@ struct odb_source; struct odb_source_loose { struct odb_source *source; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); @@ -29,8 +41,8 @@ void odb_source_loose_free(struct odb_source_loose *loose); * Populate and return the loose object cache array corresponding to the * given object ID. */ -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid); +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct odb_source *source); diff --git a/object-name.c b/object-name.c index 766c757042a389..8ce0ef7c23a6bd 100644 --- a/object-name.c +++ b/object-name.c @@ -116,7 +116,7 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_loose_cache(source, &ds->bin_pfx), + oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), &ds->bin_pfx, ds->len, match_prefix, ds); } diff --git a/odb.c b/odb.c index 2d06ab0bb85c27..87d84688c638cc 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - odb_clear_loose_cache(source); loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 49b398bedae640..77104396afe4aa 100644 --- a/odb.h +++ b/odb.h @@ -51,18 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ - struct oidtree *loose_objects_cache; - /* Map between object IDs for loose objects. */ struct loose_object_map *loose_map; From be659c97eae3b68e38b71f0a67067dede23903b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:02 +0100 Subject: [PATCH 017/553] object-file: hide internals when we need to reprepare loose sources There are two different situations where we have to clear the cache of loose objects: - When freeing the loose object source itself to avoid memory leaks. - When repreparing the loose object source so that any potentially- stale data is getting evicted from the cache. The former is already handled by `odb_source_loose_free()`. But the latter case is still done manually by in `odb_reprepare()`, so we are leaking internals into that code. Introduce a new `odb_source_loose_reprepare()` function as an equivalent to `packfile_store_prepare()` to hide these implementation details. Furthermore, while at it, rename the function `odb_clear_loose_cache()` to `odb_source_loose_clear()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 17 +++++++++++------ object-file.h | 6 +++--- odb.c | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index fef00d6d3d082a..20daa629a1dda9 100644 --- a/object-file.c +++ b/object-file.c @@ -1834,12 +1834,17 @@ struct oidtree *odb_source_loose_cache(struct odb_source *source, return source->loose->cache; } -void odb_clear_loose_cache(struct odb_source *source) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { - oidtree_clear(source->loose->cache); - FREE_AND_NULL(source->loose->cache); - memset(&source->loose->subdir_seen, 0, - sizeof(source->loose->subdir_seen)); + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +void odb_source_loose_reprepare(struct odb_source *source) +{ + odb_source_loose_clear_cache(source->loose); } static int check_stream_oid(git_zstream *stream, @@ -2008,6 +2013,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) { if (!loose) return; - odb_clear_loose_cache(loose->source); + odb_source_loose_clear_cache(loose); free(loose); } diff --git a/object-file.h b/object-file.h index 90da69cf5f7d59..bec855e8e53f95 100644 --- a/object-file.h +++ b/object-file.h @@ -37,6 +37,9 @@ struct odb_source_loose { struct odb_source_loose *odb_source_loose_new(struct odb_source *source); void odb_source_loose_free(struct odb_source_loose *loose); +/* Reprepare the loose source by emptying the loose object cache. */ +void odb_source_loose_reprepare(struct odb_source *source); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -44,9 +47,6 @@ void odb_source_loose_free(struct odb_source_loose *loose); struct oidtree *odb_source_loose_cache(struct odb_source *source, const struct object_id *oid); -/* Empty the loose object cache for the specified object directory. */ -void odb_clear_loose_cache(struct odb_source *source); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/odb.c b/odb.c index 87d84688c638cc..b3e8d4a49cb07e 100644 --- a/odb.c +++ b/odb.c @@ -1071,7 +1071,7 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) - odb_clear_loose_cache(source); + odb_source_loose_reprepare(source); o->approximate_object_count_valid = 0; From 376016ec71c3a6c883f2ca77a3f1c0245fd60dc2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:03 +0100 Subject: [PATCH 018/553] object-file: move loose object map into loose source The loose object map is used to map from the repository's canonical object hash to the compatibility hash. As the name indicates, this map is only used for loose objects, and as such it is tied to a specific loose object source. Same as with preceding commits, move this map into the loose object source accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 10 +++++----- object-file.c | 1 + object-file.h | 3 +++ odb.c | 1 - odb.h | 3 --- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/loose.c b/loose.c index 8cc7573ff2b2d9..56cf64b648bf80 100644 --- a/loose.c +++ b/loose.c @@ -49,7 +49,7 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose_map; + struct loose_object_map *map = source->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); @@ -65,8 +65,8 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose_map) - loose_object_map_init(&source->loose_map); + if (!source->loose->map) + loose_object_map_init(&source->loose->map); if (!source->loose->cache) { ALLOC_ARRAY(source->loose->cache, 1); oidtree_init(source->loose->cache); @@ -125,7 +125,7 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose_map->to_compat; + kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +231,7 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose_map; + struct loose_object_map *loose_map = source->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/object-file.c b/object-file.c index 20daa629a1dda9..ccc67713fad38f 100644 --- a/object-file.c +++ b/object-file.c @@ -2014,5 +2014,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) if (!loose) return; odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); free(loose); } diff --git a/object-file.h b/object-file.h index bec855e8e53f95..f8a96a45f57703 100644 --- a/object-file.h +++ b/object-file.h @@ -32,6 +32,9 @@ struct odb_source_loose { */ uint32_t subdir_seen[8]; /* 256 bits */ struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); diff --git a/odb.c b/odb.c index b3e8d4a49cb07e..d1df9609e21d1e 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 77104396afe4aa..f9a3137a34aa3b 100644 --- a/odb.h +++ b/odb.h @@ -51,9 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* Map between object IDs for loose objects. */ - struct loose_object_map *loose_map; - /* * private data * From ff7ad5cb3936514ec0be32531ff6274b53dbe091 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:04 +0100 Subject: [PATCH 019/553] object-file: read objects via the loose object source When reading an object via `loose_object_info()` or `map_loose_object()` we hand in the whole repository. We then iterate through each of the object sources to figure out whether that source has the object in question. This logic is reversing responsibility though: a specific backend should only care about one specific source, where the object sources themselves are then managed by the object database. Refactor the code accordingly by passing an object source to both of these functions instead. The different sources are then handled by either `do_oid_object_info_extended()`, which sits on the object database level, and by `open_istream_loose()`. The latter function arguably is still at the wrong level, but this will be cleaned up at a later point in time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 68 +++++++++++++++++++-------------------------------- object-file.h | 15 ++++++------ odb.c | 9 +++++-- streaming.c | 11 ++++++++- 4 files changed, 50 insertions(+), 53 deletions(-) diff --git a/object-file.c b/object-file.c index ccc67713fad38f..6d6e9a5a2ad3c8 100644 --- a/object-file.c +++ b/object-file.c @@ -167,25 +167,22 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in the local repository or in an alternate. + * Find "oid" as a loose object in given source. * Returns 0 on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct repository *r, const struct object_id *oid, +static int stat_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, struct stat *st, const char **path) { - struct odb_source *source; static struct strbuf buf = STRBUF_INIT; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - if (!lstat(*path, st)) - return 0; - } + *path = odb_loose_path(loose->source, &buf, oid); + if (!lstat(*path, st)) + return 0; return -1; } @@ -194,39 +191,24 @@ static int stat_loose_object(struct repository *r, const struct object_id *oid, * Like stat_loose_object(), but actually open the object and return the * descriptor. See the caveats on the "path" parameter above. */ -static int open_loose_object(struct repository *r, +static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { - int fd; - struct odb_source *source; - int most_interesting_errno = ENOENT; static struct strbuf buf = STRBUF_INIT; + int fd; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; + *path = odb_loose_path(loose->source, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; return -1; } -static int quick_has_loose(struct repository *r, +static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - struct odb_source *source; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) - return 1; - } - return 0; + return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); } /* @@ -252,12 +234,12 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *map_loose_object(struct repository *r, - const struct object_id *oid, - unsigned long *size) +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; - int fd = open_loose_object(r, oid, &p); + int fd = open_loose_object(source->loose, oid, &p); if (fd < 0) return NULL; @@ -407,9 +389,9 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags) +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags) { int status = 0; int fd; @@ -422,7 +404,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, r->hash_algo); + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); /* * If we don't care about type or size, then we don't @@ -435,15 +417,15 @@ int loose_object_info(struct repository *r, if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(r, oid) ? 0 : -1; - if (stat_loose_object(r, oid, &st, &path) < 0) + return quick_has_loose(source->loose, oid) ? 0 : -1; + if (stat_loose_object(source->loose, oid, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(r, oid, &path); + fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); diff --git a/object-file.h b/object-file.h index f8a96a45f57703..ca13d3d64e722b 100644 --- a/object-file.h +++ b/object-file.h @@ -43,6 +43,14 @@ void odb_source_loose_free(struct odb_source_loose *loose); /* Reprepare the loose source by emptying the loose object cache. */ void odb_source_loose_reprepare(struct odb_source *source); +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags); + +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,9 +74,6 @@ const char *odb_loose_path(struct odb_source *source, int has_loose_object(struct odb_source *source, const struct object_id *oid); -void *map_loose_object(struct repository *r, const struct object_id *oid, - unsigned long *size); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: @@ -196,10 +201,6 @@ int check_object_signature(struct repository *r, const struct object_id *oid, */ int stream_object_signature(struct repository *r, const struct object_id *oid); -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags); - enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, }; diff --git a/odb.c b/odb.c index d1df9609e21d1e..4c0b4fdcd54ce1 100644 --- a/odb.c +++ b/odb.c @@ -697,13 +697,18 @@ static int do_oid_object_info_extended(struct object_database *odb, return 0; } + odb_prepare_alternates(odb); + while (1) { + struct odb_source *source; + if (find_pack_entry(odb->repo, real, &e)) break; /* Most likely it's a loose object. */ - if (!loose_object_info(odb->repo, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_info(source, real, oi, flags)) + return 0; /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { diff --git a/streaming.c b/streaming.c index 4b13827668e67a..00ad649ae397f3 100644 --- a/streaming.c +++ b/streaming.c @@ -230,12 +230,21 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_source *source; + oi.sizep = &st->size; oi.typep = type; - st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + st->u.loose.mapped = odb_source_loose_map_object(source, oid, + &st->u.loose.mapsize); + if (st->u.loose.mapped) + break; + } if (!st->u.loose.mapped) return -1; + switch (unpack_loose_header(&st->z, st->u.loose.mapped, st->u.loose.mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { From 05130c6c9eed9ff7450e9067d7215032eb914c10 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:05 +0100 Subject: [PATCH 020/553] object-file: rename `has_loose_object()` Rename `has_loose_object()` to `odb_source_loose_has_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- object-file.c | 6 +++--- object-file.h | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df137b4..69e80b1443a9b7 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1716,7 +1716,7 @@ static int want_object_in_pack_mtime(const struct object_id *oid, */ struct odb_source *source = the_repository->objects->sources->next; for (; source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) return 0; } @@ -3978,7 +3978,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type int found = 0; for (; !found && source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) found = 1; /* diff --git a/object-file.c b/object-file.c index 6d6e9a5a2ad3c8..79e7ab8d2e3d0e 100644 --- a/object-file.c +++ b/object-file.c @@ -99,8 +99,8 @@ static int check_and_freshen_source(struct odb_source *source, return check_and_freshen_file(path.buf, freshen); } -int has_loose_object(struct odb_source *source, - const struct object_id *oid) +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid) { return check_and_freshen_source(source, oid, 0); } @@ -1161,7 +1161,7 @@ int force_object_loose(struct odb_source *source, int ret; for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (has_loose_object(s, oid)) + if (odb_source_loose_has_object(s, oid)) return 0; oi.typep = &type; diff --git a/object-file.h b/object-file.h index ca13d3d64e722b..065a44bb8a019e 100644 --- a/object-file.h +++ b/object-file.h @@ -51,6 +51,14 @@ void *odb_source_loose_map_object(struct odb_source *source, const struct object_id *oid, unsigned long *size); +/* + * Return true iff an object database source has a loose object + * with the specified name. This function does not respect replace + * references. + */ +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,14 +74,6 @@ const char *odb_loose_path(struct odb_source *source, struct strbuf *buf, const struct object_id *oid); -/* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. - */ -int has_loose_object(struct odb_source *source, - const struct object_id *oid); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: From f2bd88a308a2754e727cb462e03102307cdfe004 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:06 +0100 Subject: [PATCH 021/553] object-file: refactor freshening of objects When writing an object that already exists in our object database we skip the write and instead only update mtimes of the object, either in its packed or loose object format. This logic is wholly contained in "object-file.c", but that file is really only concerned with loose objects. So it does not really make sense that it also contains the logic to freshen a packed object. Introduce a new `odb_freshen_object()` function that sits on the object database level and two functions `packfile_store_freshen_object()` and `odb_source_loose_freshen_object()`. Like this, the format-specific functions can be part of their respective subsystems, while the backend agnostic function to freshen an object sits at the object database layer. Note that this change also moves the logic that iterates through object sources from the object source layer into the object database layer. This change is intentional: object sources should ideally only have to worry about themselves, and coordination of different sources should be handled on the object database level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 33 +++++---------------------------- object-file.h | 3 +++ odb.c | 16 ++++++++++++++++ odb.h | 3 +++ packfile.c | 16 ++++++++++++++++ packfile.h | 3 +++ 6 files changed, 46 insertions(+), 28 deletions(-) diff --git a/object-file.c b/object-file.c index 79e7ab8d2e3d0e..893c32adcddbbd 100644 --- a/object-file.c +++ b/object-file.c @@ -968,30 +968,10 @@ static int write_loose_object(struct odb_source *source, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(struct object_database *odb, - const struct object_id *oid) +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) { - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) - if (check_and_freshen_source(source, oid, 1)) - return 1; - return 0; -} - -static int freshen_packed_object(struct object_database *odb, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (!freshen_file(e.p->pack_name)) - return 0; - e.p->freshened = 1; - return 1; + return !!check_and_freshen_source(source, oid, 1); } int stream_loose_object(struct odb_source *source, @@ -1073,12 +1053,10 @@ int stream_loose_object(struct odb_source *source, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) { + if (odb_freshen_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ @@ -1137,8 +1115,7 @@ int write_object_file(struct odb_source *source, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) + if (odb_freshen_object(source->odb, oid)) return 0; if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; diff --git a/object-file.h b/object-file.h index 065a44bb8a019e..ee5b24cec66c34 100644 --- a/object-file.h +++ b/object-file.h @@ -59,6 +59,9 @@ void *odb_source_loose_map_object(struct odb_source *source, int odb_source_loose_has_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/odb.c b/odb.c index 4c0b4fdcd54ce1..17734bdaffe8e6 100644 --- a/odb.c +++ b/odb.c @@ -987,6 +987,22 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, return odb_read_object_info_extended(odb, oid, NULL, object_info_flags) >= 0; } +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (packfile_store_freshen_object(odb->packfiles, oid)) + return 1; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (odb_source_loose_freshen_object(source, oid)) + return 1; + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f9a3137a34aa3b..2653247e0cc871 100644 --- a/odb.h +++ b/odb.h @@ -396,6 +396,9 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, unsigned flags); +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); diff --git a/packfile.c b/packfile.c index 1ae2b2fe1eda77..40f733dd234900 100644 --- a/packfile.c +++ b/packfile.c @@ -819,6 +819,22 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, diff --git a/packfile.h b/packfile.h index c9d0b93446b5f5..58fcc88e20224b 100644 --- a/packfile.h +++ b/packfile.h @@ -163,6 +163,9 @@ struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); struct packed_git *packfile_store_load_pack(struct packfile_store *store, const char *idx_path, int local); +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid); + struct pack_window { struct pack_window *next; unsigned char *base; From bfb1b2b4ac5cfa99f7d2503b404d282714d84bdf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:07 +0100 Subject: [PATCH 022/553] object-file: rename `write_object_file()` Rename `write_object_file()` to `odb_source_loose_write_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 8 ++++---- object-file.h | 10 +++++----- odb.c | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index 893c32adcddbbd..fdc644a4275373 100644 --- a/object-file.c +++ b/object-file.c @@ -1084,10 +1084,10 @@ int stream_loose_object(struct odb_source *source, return err; } -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; diff --git a/object-file.h b/object-file.h index ee5b24cec66c34..36a60e15c40547 100644 --- a/object-file.h +++ b/object-file.h @@ -62,6 +62,11 @@ int odb_source_loose_has_object(struct odb_source *source, int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -168,11 +173,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); - struct input_stream { const void *(*read)(struct input_stream *, unsigned long *len); void *data; diff --git a/odb.c b/odb.c index 17734bdaffe8e6..da44f1d63b43b2 100644 --- a/odb.c +++ b/odb.c @@ -1021,7 +1021,8 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return write_object_file(odb->sources, buf, len, type, oid, compat_oid, flags); + return odb_source_loose_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); } struct object_database *odb_new(struct repository *repo) From 3e5e360888316ed1a44da69bf134bb6ec70aee1b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:08 +0100 Subject: [PATCH 023/553] object-file: refactor writing objects via a stream We have two different ways to write an object into the database: - We either provide the full buffer and write the object all at once. - Or we provide an input stream that has a `read()` function so that we can chunk the object. The latter is especially used for large objects, where it may be too expensive to hold the complete object in memory all at once. While we already have `odb_write_object()` at the ODB-layer, we don't have an equivalent for streaming an object. Introduce a new function `odb_write_object_stream()` to address this gap so that callers don't have to be aware of the inner workings of how to stream an object to disk with a specific object source. Rename `stream_loose_object()` to `odb_source_loose_write_stream()` to clarify its scope. This matches our modern best practices around how to name functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 7 +++---- object-file.c | 6 +++--- object-file.h | 14 ++++---------- odb.c | 7 +++++++ odb.h | 10 ++++++++++ 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index ef79e43715d362..6fc64e9e4b8d5a 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -363,7 +363,7 @@ struct input_zstream_data { int status; }; -static const void *feed_input_zstream(struct input_stream *in_stream, +static const void *feed_input_zstream(struct odb_write_stream *in_stream, unsigned long *readlen) { struct input_zstream_data *data = in_stream->data; @@ -393,7 +393,7 @@ static void stream_blob(unsigned long size, unsigned nr) { git_zstream zstream = { 0 }; struct input_zstream_data data = { 0 }; - struct input_stream in_stream = { + struct odb_write_stream in_stream = { .read = feed_input_zstream, .data = &data, }; @@ -402,8 +402,7 @@ static void stream_blob(unsigned long size, unsigned nr) data.zstream = &zstream; git_inflate_init(&zstream); - if (stream_loose_object(the_repository->objects->sources, - &in_stream, size, &info->oid)) + if (odb_write_object_stream(the_repository->objects, &in_stream, size, &info->oid)) die(_("failed to write object in stream")); if (data.status != Z_STREAM_END) diff --git a/object-file.c b/object-file.c index fdc644a4275373..811c569ed36aa4 100644 --- a/object-file.c +++ b/object-file.c @@ -974,9 +974,9 @@ int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_source(source, oid, 1); } -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid) +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *in_stream, size_t len, + struct object_id *oid) { const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; diff --git a/object-file.h b/object-file.h index 36a60e15c40547..eeffa67bbda631 100644 --- a/object-file.h +++ b/object-file.h @@ -67,6 +67,10 @@ int odb_source_loose_write_object(struct odb_source *source, enum object_type type, struct object_id *oid, struct object_id *compat_oid_in, unsigned flags); +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -173,16 +177,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -struct input_stream { - const void *(*read)(struct input_stream *, unsigned long *len); - void *data; - int is_finished; -}; - -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/odb.c b/odb.c index da44f1d63b43b2..3ec21ef24e16bb 100644 --- a/odb.c +++ b/odb.c @@ -1025,6 +1025,13 @@ int odb_write_object_ext(struct object_database *odb, oid, compat_oid, flags); } +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(odb->sources, stream, len, oid); +} + struct object_database *odb_new(struct repository *repo) { struct object_database *o = xmalloc(sizeof(*o)); diff --git a/odb.h b/odb.h index 2653247e0cc871..9bb28008b1d953 100644 --- a/odb.h +++ b/odb.h @@ -492,4 +492,14 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } +struct odb_write_stream { + const void *(*read)(struct odb_write_stream *, unsigned long *len); + void *data; + int is_finished; +}; + +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + #endif /* ODB_H */ From bdbebe5714b25dc9d215b48efbb80f410925d7dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:10 +0200 Subject: [PATCH 024/553] refs: introduce wrapper struct for `each_ref_fn` The `each_ref_fn` callback function type is used across our code base for several different functions that iterate through reference. There's a bunch of callbacks implementing this type, which makes any changes to the callback signature extremely noisy. An example of the required churn is e8207717f1 (refs: add referent to each_ref_fn, 2024-08-09): adding a single argument required us to change 48 files. It was already proposed back then [1] that we might want to introduce a wrapper structure to alleviate the pain going forward. While this of course requires the same kind of global refactoring as just introducing a new parameter, it at least allows us to more change the callback type afterwards by just extending the wrapper structure. One counterargument to this refactoring is that it makes the structure more opaque. While it is obvious which callsites need to be fixed up when we change the function type, it's not obvious anymore once we use a structure. That being said, we only have a handful of sites that actually need to populate this wrapper structure: our ref backends, "refs/iterator.c" as well as very few sites that invoke the iterator callback functions directly. Introduce this wrapper structure so that we can adapt the iterator interfaces more readily. [1]: Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 24 ++++++------- builtin/bisect.c | 17 +++------- builtin/checkout.c | 6 ++-- builtin/describe.c | 18 +++++----- builtin/fetch.c | 13 +++---- builtin/fsck.c | 33 ++++++++++-------- builtin/gc.c | 15 ++++----- builtin/name-rev.c | 17 +++++----- builtin/pack-objects.c | 27 ++++++--------- builtin/receive-pack.c | 13 ++++--- builtin/remote.c | 44 +++++++++++------------- builtin/replace.c | 21 +++++------- builtin/repo.c | 9 ++--- builtin/rev-parse.c | 12 +++---- builtin/show-branch.c | 35 +++++++++---------- builtin/show-ref.c | 20 +++++------ builtin/submodule--helper.c | 10 ++---- builtin/worktree.c | 6 +--- commit-graph.c | 14 ++++---- delta-islands.c | 9 +++-- fetch-pack.c | 16 +++------ help.c | 10 +++--- http-backend.c | 20 +++++------ log-tree.c | 24 ++++++------- ls-refs.c | 36 ++++++++++++-------- midx-write.c | 17 +++++----- negotiator/default.c | 7 ++-- negotiator/skipping.c | 7 ++-- notes.c | 8 ++--- object-name.c | 10 +++--- pseudo-merge.c | 21 +++++------- reachable.c | 9 +++-- ref-filter.c | 24 +++++++------ reflog.c | 9 ++--- refs.c | 67 +++++++++++++++++++++---------------- refs.h | 26 +++++++++++--- refs/files-backend.c | 7 ++-- refs/iterator.c | 9 ++++- remote.c | 27 +++++++-------- repack-midx.c | 16 ++++----- replace-object.c | 16 ++++----- revision.c | 12 +++---- server-info.c | 12 +++---- shallow.c | 16 +++------ submodule.c | 12 ++----- t/helper/test-ref-store.c | 5 ++- upload-pack.c | 29 +++++++--------- walker.c | 8 ++--- worktree.c | 11 ++++-- 49 files changed, 392 insertions(+), 462 deletions(-) diff --git a/bisect.c b/bisect.c index a6dc76b15c910b..326b59c0dc70e7 100644 --- a/bisect.c +++ b/bisect.c @@ -450,21 +450,20 @@ void find_bisection(struct commit_list **commit_list, int *reaches, clear_commit_weight(&commit_weight); } -static int register_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data UNUSED) +static int register_ref(const struct reference *ref, void *cb_data UNUSED) { struct strbuf good_prefix = STRBUF_INIT; strbuf_addstr(&good_prefix, term_good); strbuf_addstr(&good_prefix, "-"); - if (!strcmp(refname, term_bad)) { + if (!strcmp(ref->name, term_bad)) { free(current_bad_oid); current_bad_oid = xmalloc(sizeof(*current_bad_oid)); - oidcpy(current_bad_oid, oid); - } else if (starts_with(refname, good_prefix.buf)) { - oid_array_append(&good_revs, oid); - } else if (starts_with(refname, "skip-")) { - oid_array_append(&skipped_revs, oid); + oidcpy(current_bad_oid, ref->oid); + } else if (starts_with(ref->name, good_prefix.buf)) { + oid_array_append(&good_revs, ref->oid); + } else if (starts_with(ref->name, "skip-")) { + oid_array_append(&skipped_revs, ref->oid); } strbuf_release(&good_prefix); @@ -1178,14 +1177,11 @@ int estimate_bisect_steps(int all) return (e < 3 * x) ? n : n - 1; } -static int mark_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int mark_for_removal(const struct reference *ref, void *cb_data) { struct string_list *refs = cb_data; - char *ref = xstrfmt("refs/bisect%s", refname); - string_list_append(refs, ref); + char *bisect_ref = xstrfmt("refs/bisect%s", ref->name); + string_list_append(refs, bisect_ref); return 0; } diff --git a/builtin/bisect.c b/builtin/bisect.c index 8b8d870cd1ef08..5b2024be62dacd 100644 --- a/builtin/bisect.c +++ b/builtin/bisect.c @@ -358,10 +358,7 @@ static int check_and_set_terms(struct bisect_terms *terms, const char *cmd) return 0; } -static int inc_nr(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb_data) +static int inc_nr(const struct reference *ref UNUSED, void *cb_data) { unsigned int *nr = (unsigned int *)cb_data; (*nr)++; @@ -549,12 +546,11 @@ static int bisect_append_log_quoted(const char **argv) return res; } -static int add_bisect_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int add_bisect_ref(const struct reference *ref, void *cb) { struct add_bisect_ref_data *data = cb; - add_pending_oid(data->revs, refname, oid, data->object_flags); + add_pending_oid(data->revs, ref->name, ref->oid, data->object_flags); return 0; } @@ -1165,12 +1161,9 @@ static int bisect_visualize(struct bisect_terms *terms, int argc, return run_command(&cmd); } -static int get_first_good(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int get_first_good(const struct reference *ref, void *cb_data) { - oidcpy(cb_data, oid); + oidcpy(cb_data, ref->oid); return 1; } diff --git a/builtin/checkout.c b/builtin/checkout.c index f9453473fe2a20..66b69df6e67234 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1063,11 +1063,9 @@ static void update_refs_for_switch(const struct checkout_opts *opts, report_tracking(new_branch_info); } -static int add_pending_uninteresting_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_pending_uninteresting_ref(const struct reference *ref, void *cb_data) { - add_pending_oid(cb_data, refname, oid, UNINTERESTING); + add_pending_oid(cb_data, ref->name, ref->oid, UNINTERESTING); return 0; } diff --git a/builtin/describe.c b/builtin/describe.c index ffaf8d9f0aa6ea..79545350443c6c 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -154,20 +154,19 @@ static void add_to_known_names(const char *path, } } -static int get_name(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int get_name(const struct reference *ref, void *cb_data UNUSED) { int is_tag = 0; struct object_id peeled; int is_annotated, prio; const char *path_to_match = NULL; - if (skip_prefix(path, "refs/tags/", &path_to_match)) { + if (skip_prefix(ref->name, "refs/tags/", &path_to_match)) { is_tag = 1; } else if (all) { if ((exclude_patterns.nr || patterns.nr) && - !skip_prefix(path, "refs/heads/", &path_to_match) && - !skip_prefix(path, "refs/remotes/", &path_to_match)) { + !skip_prefix(ref->name, "refs/heads/", &path_to_match) && + !skip_prefix(ref->name, "refs/remotes/", &path_to_match)) { /* Only accept reference of known type if there are match/exclude patterns */ return 0; } @@ -209,10 +208,10 @@ static int get_name(const char *path, const char *referent UNUSED, const struct } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, oid, &peeled)) { - is_annotated = !oideq(oid, &peeled); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + is_annotated = !oideq(ref->oid, &peeled); } else { - oidcpy(&peeled, oid); + oidcpy(&peeled, ref->oid); is_annotated = 0; } @@ -229,7 +228,8 @@ static int get_name(const char *path, const char *referent UNUSED, const struct else prio = 0; - add_to_known_names(all ? path + 5 : path + 10, &peeled, prio, oid); + add_to_known_names(all ? ref->name + 5 : ref->name + 10, + &peeled, prio, ref->oid); return 0; } diff --git a/builtin/fetch.c b/builtin/fetch.c index c7ff3480fb1827..7052e6ff215ead 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -289,13 +289,11 @@ static struct refname_hash_entry *refname_hash_add(struct hashmap *map, return ent; } -static int add_one_refname(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int add_one_refname(const struct reference *ref, void *cbdata) { struct hashmap *refname_map = cbdata; - (void) refname_hash_add(refname_map, refname, oid); + (void) refname_hash_add(refname_map, ref->name, ref->oid); return 0; } @@ -1416,14 +1414,11 @@ static void set_option(struct transport *transport, const char *name, const char } -static int add_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_oid(const struct reference *ref, void *cb_data) { struct oid_array *oids = cb_data; - oid_array_append(oids, oid); + oid_array_append(oids, ref->oid); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index 8ee95e0d67cf37..ed4eea16803349 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -530,14 +530,13 @@ static int fsck_handle_reflog(const char *logname, void *cb_data) return 0; } -static int fsck_handle_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int fsck_handle_ref(const struct reference *ref, void *cb_data UNUSED) { struct object *obj; - obj = parse_object(the_repository, oid); + obj = parse_object(the_repository, ref->oid); if (!obj) { - if (is_promisor_object(the_repository, oid)) { + if (is_promisor_object(the_repository, ref->oid)) { /* * Increment default_refs anyway, because this is a * valid ref. @@ -546,19 +545,19 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con return 0; } error(_("%s: invalid sha1 pointer %s"), - refname, oid_to_hex(oid)); + ref->name, oid_to_hex(ref->oid)); errors_found |= ERROR_REACHABLE; /* We'll continue with the rest despite the error.. */ return 0; } - if (obj->type != OBJ_COMMIT && is_branch(refname)) { - error(_("%s: not a commit"), refname); + if (obj->type != OBJ_COMMIT && is_branch(ref->name)) { + error(_("%s: not a commit"), ref->name); errors_found |= ERROR_REFS; } default_refs++; obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, - oid, "%s", refname); + ref->oid, "%s", ref->name); mark_object_reachable(obj); return 0; @@ -580,13 +579,19 @@ static void get_default_heads(void) worktrees = get_worktrees(); for (p = worktrees; *p; p++) { struct worktree *wt = *p; - struct strbuf ref = STRBUF_INIT; + struct strbuf refname = STRBUF_INIT; - strbuf_worktree_ref(wt, &ref, "HEAD"); - fsck_head_link(ref.buf, &head_points_at, &head_oid); - if (head_points_at && !is_null_oid(&head_oid)) - fsck_handle_ref(ref.buf, NULL, &head_oid, 0, NULL); - strbuf_release(&ref); + strbuf_worktree_ref(wt, &refname, "HEAD"); + fsck_head_link(refname.buf, &head_points_at, &head_oid); + if (head_points_at && !is_null_oid(&head_oid)) { + struct reference ref = { + .name = refname.buf, + .oid = &head_oid, + }; + + fsck_handle_ref(&ref, NULL); + } + strbuf_release(&refname); if (include_reflogs) refs_for_each_reflog(get_worktree_ref_store(wt), diff --git a/builtin/gc.c b/builtin/gc.c index e19e13d9788076..9de5de175f6a40 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1100,24 +1100,21 @@ struct cg_auto_data { int limit; }; -static int dfs_on_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int dfs_on_ref(const struct reference *ref, void *cb_data) { struct cg_auto_data *data = (struct cg_auto_data *)cb_data; int result = 0; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; - commit = lookup_commit(the_repository, oid); + commit = lookup_commit(the_repository, maybe_peeled); if (!commit) return 0; if (repo_parse_commit(the_repository, commit) || diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 74512e54a38c45..615f7d1aae4987 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -339,10 +339,9 @@ static int cmp_by_tag_and_age(const void *a_, const void *b_) return a->taggerdate != b->taggerdate; } -static int name_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int name_ref(const struct reference *ref, void *cb_data) { - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); struct name_ref_data *data = cb_data; int can_abbreviate_output = data->tags_only && data->name_only; int deref = 0; @@ -350,14 +349,14 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct struct commit *commit = NULL; timestamp_t taggerdate = TIME_MAX; - if (data->tags_only && !starts_with(path, "refs/tags/")) + if (data->tags_only && !starts_with(ref->name, "refs/tags/")) return 0; if (data->exclude_filters.nr) { struct string_list_item *item; for_each_string_list_item(item, &data->exclude_filters) { - if (subpath_matches(path, item->string) >= 0) + if (subpath_matches(ref->name, item->string) >= 0) return 0; } } @@ -378,7 +377,7 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct * shouldn't stop when seeing 'refs/tags/v1.4' matches * 'refs/tags/v*'. We should show it as 'v1.4'. */ - switch (subpath_matches(path, item->string)) { + switch (subpath_matches(ref->name, item->string)) { case -1: /* did not match */ break; case 0: /* matched fully */ @@ -406,13 +405,13 @@ static int name_ref(const char *path, const char *referent UNUSED, const struct } if (o && o->type == OBJ_COMMIT) { commit = (struct commit *)o; - from_tag = starts_with(path, "refs/tags/"); + from_tag = starts_with(ref->name, "refs/tags/"); if (taggerdate == TIME_MAX) taggerdate = commit->date; } - add_to_tip_table(oid, path, can_abbreviate_output, commit, taggerdate, - from_tag, deref); + add_to_tip_table(ref->oid, ref->name, can_abbreviate_output, + commit, taggerdate, from_tag, deref); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5bdc44fb2de1fa..39633a0158e095 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -831,15 +831,14 @@ static enum write_one_status write_one(struct hashfile *f, return WRITE_ONE_WRITTEN; } -static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid); + struct object_entry *entry = packlist_find(&to_pack, ref->oid); if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3306,13 +3305,12 @@ static void add_tag_chain(const struct object_id *oid) } } -static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled)) - add_tag_chain(oid); + if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + add_tag_chain(ref->oid); return 0; } @@ -4533,19 +4531,16 @@ static void record_recent_commit(struct commit *commit, void *data UNUSED) oid_array_append(&recent_objects, &commit->object.oid); } -static int mark_bitmap_preferred_tip(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *data UNUSED) +static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNUSED) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(the_repository, oid, refname); + object = parse_object_or_die(the_repository, maybe_peeled, ref->name); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e382b..e8ee0e73217aae 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -305,13 +305,12 @@ static void show_ref(const char *path, const struct object_id *oid) } } -static int show_ref_cb(const char *path_full, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *data) +static int show_ref_cb(const struct reference *ref, void *data) { struct oidset *seen = data; - const char *path = strip_namespace(path_full); + const char *path = strip_namespace(ref->name); - if (ref_is_hidden(path, path_full, &hidden_refs)) + if (ref_is_hidden(path, ref->name, &hidden_refs)) return 0; /* @@ -320,13 +319,13 @@ static int show_ref_cb(const char *path_full, const char *referent UNUSED, const * transfer but will otherwise ignore them. */ if (!path) { - if (oidset_insert(seen, oid)) + if (oidset_insert(seen, ref->oid)) return 0; path = ".have"; } else { - oidset_insert(seen, oid); + oidset_insert(seen, ref->oid); } - show_ref(path, oid); + show_ref(path, ref->oid); return 0; } diff --git a/builtin/remote.c b/builtin/remote.c index 8a7ed4299a4b51..7ffc14ba15743a 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -570,17 +570,14 @@ struct branches_for_remote { struct known_remotes *keep; }; -static int add_branch_for_removal(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int add_branch_for_removal(const struct reference *ref, void *cb_data) { struct branches_for_remote *branches = cb_data; struct refspec_item refspec; struct known_remote *kr; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (remote_find_tracking(branches->remote, &refspec)) return 0; free(refspec.src); @@ -588,7 +585,7 @@ static int add_branch_for_removal(const char *refname, /* don't delete a branch if another remote also uses it */ for (kr = branches->keep->list; kr; kr = kr->next) { memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(kr->remote, &refspec)) { free(refspec.src); return 0; @@ -596,16 +593,16 @@ static int add_branch_for_removal(const char *refname, } /* don't delete non-remote-tracking refs */ - if (!starts_with(refname, "refs/remotes/")) { + if (!starts_with(ref->name, "refs/remotes/")) { /* advise user how to delete local branches */ - if (starts_with(refname, "refs/heads/")) + if (starts_with(ref->name, "refs/heads/")) string_list_append(branches->skipped, - abbrev_branch(refname)); + abbrev_branch(ref->name)); /* silently skip over other non-remote refs */ return 0; } - string_list_append(branches->branches, refname); + string_list_append(branches->branches, ref->name); return 0; } @@ -713,18 +710,18 @@ static int rename_one_reflog(const char *old_refname, return error; } -static int rename_one_ref(const char *old_refname, const char *referent, - const struct object_id *oid, - int flags, void *cb_data) +static int rename_one_ref(const struct reference *ref, void *cb_data) { struct strbuf new_referent = STRBUF_INIT; struct strbuf new_refname = STRBUF_INIT; struct rename_info *rename = cb_data; + const struct object_id *oid = ref->oid; + const char *referent = ref->target; int error; - compute_renamed_ref(rename, old_refname, &new_refname); + compute_renamed_ref(rename, ref->name, &new_refname); - if (flags & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { /* * Stupidly enough `referent` is not pointing to the immediate * target of a symref, but it's the recursively resolved value. @@ -732,25 +729,25 @@ static int rename_one_ref(const char *old_refname, const char *referent, * unborn symrefs don't have any value for the `referent` at all. */ referent = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - old_refname, RESOLVE_REF_NO_RECURSE, + ref->name, RESOLVE_REF_NO_RECURSE, NULL, NULL); compute_renamed_ref(rename, referent, &new_referent); oid = NULL; } - error = ref_transaction_delete(rename->transaction, old_refname, + error = ref_transaction_delete(rename->transaction, ref->name, oid, referent, REF_NO_DEREF, NULL, rename->err); if (error < 0) goto out; error = ref_transaction_update(rename->transaction, new_refname.buf, oid, null_oid(the_hash_algo), - (flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, + (ref->flags & REF_ISSYMREF) ? new_referent.buf : NULL, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF | REF_SKIP_OID_VERIFICATION, NULL, rename->err); if (error < 0) goto out; - error = rename_one_reflog(old_refname, oid, rename); + error = rename_one_reflog(ref->name, oid, rename); if (error < 0) goto out; @@ -1125,19 +1122,16 @@ static void free_remote_ref_states(struct ref_states *states) string_list_clear_func(&states->push, clear_push_info); } -static int append_ref_to_tracked_list(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int append_ref_to_tracked_list(const struct reference *ref, void *cb_data) { struct ref_states *states = cb_data; struct refspec_item refspec; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; memset(&refspec, 0, sizeof(refspec)); - refspec.dst = (char *)refname; + refspec.dst = (char *)ref->name; if (!remote_find_tracking(states->remote, &refspec)) { string_list_append(&states->tracked, abbrev_branch(refspec.src)); free(refspec.src); diff --git a/builtin/replace.c b/builtin/replace.c index 900b560a77d9d7..4c62c5ab58bd0a 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -47,30 +47,27 @@ struct show_data { enum replace_format format; }; -static int show_reference(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_reference(const struct reference *ref, void *cb_data) { struct show_data *data = cb_data; - if (!wildmatch(data->pattern, refname, 0)) { + if (!wildmatch(data->pattern, ref->name, 0)) { if (data->format == REPLACE_FORMAT_SHORT) - printf("%s\n", refname); + printf("%s\n", ref->name); else if (data->format == REPLACE_FORMAT_MEDIUM) - printf("%s -> %s\n", refname, oid_to_hex(oid)); + printf("%s -> %s\n", ref->name, oid_to_hex(ref->oid)); else { /* data->format == REPLACE_FORMAT_LONG */ struct object_id object; enum object_type obj_type, repl_type; - if (repo_get_oid(data->repo, refname, &object)) - return error(_("failed to resolve '%s' as a valid ref"), refname); + if (repo_get_oid(data->repo, ref->name, &object)) + return error(_("failed to resolve '%s' as a valid ref"), ref->name); obj_type = odb_read_object_info(data->repo->objects, &object, NULL); - repl_type = odb_read_object_info(data->repo->objects, oid, NULL); + repl_type = odb_read_object_info(data->repo->objects, ref->oid, NULL); - printf("%s (%s) -> %s (%s)\n", refname, type_name(obj_type), - oid_to_hex(oid), type_name(repl_type)); + printf("%s (%s) -> %s (%s)\n", ref->name, type_name(obj_type), + oid_to_hex(ref->oid), type_name(repl_type)); } } diff --git a/builtin/repo.c b/builtin/repo.c index 9d4749f79befa8..f26640bd6ea1e7 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -366,16 +366,13 @@ struct count_references_data { struct progress *progress; }; -static int count_references(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int count_references(const struct reference *ref, void *cb_data) { struct count_references_data *data = cb_data; struct ref_stats *stats = data->stats; size_t ref_count; - switch (ref_kind_from_refname(refname)) { + switch (ref_kind_from_refname(ref->name)) { case FILTER_REFS_BRANCHES: stats->branches++; break; @@ -396,7 +393,7 @@ static int count_references(const char *refname, * While iterating through references for counting, also add OIDs in * preparation for the path walk. */ - add_pending_oid(data->revs, NULL, oid, 0); + add_pending_oid(data->revs, NULL, ref->oid, 0); ref_count = get_total_reference_count(stats); display_progress(data->progress, ref_count); diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 9da92b990d074b..3578591b4f2a38 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -217,19 +217,17 @@ static int show_default(void) return 0; } -static int show_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int show_reference(const struct reference *ref, void *cb_data UNUSED) { - if (ref_excluded(&ref_excludes, refname)) + if (ref_excluded(&ref_excludes, ref->name)) return 0; - show_rev(NORMAL, oid, refname); + show_rev(NORMAL, ref->oid, ref->name); return 0; } -static int anti_reference(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int anti_reference(const struct reference *ref, void *cb_data UNUSED) { - show_rev(REVERSED, oid, refname); + show_rev(REVERSED, ref->oid, ref->name); return 0; } diff --git a/builtin/show-branch.c b/builtin/show-branch.c index 441babf2e350f9..10475a6b5edb2b 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -413,34 +413,32 @@ static int append_ref(const char *refname, const struct object_id *oid, return 0; } -static int append_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_head_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 11; - if (!starts_with(refname, "refs/heads/")) + if (!starts_with(ref->name, "refs/heads/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } -static int append_remote_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int append_remote_ref(const struct reference *ref, void *cb_data UNUSED) { struct object_id tmp; int ofs = 13; - if (!starts_with(refname, "refs/remotes/")) + if (!starts_with(ref->name, "refs/remotes/")) return 0; /* If both heads/foo and tags/foo exists, get_sha1 would * get confused. */ - if (repo_get_oid(the_repository, refname + ofs, &tmp) || !oideq(&tmp, oid)) + if (repo_get_oid(the_repository, ref->name + ofs, &tmp) || !oideq(&tmp, ref->oid)) ofs = 5; - return append_ref(refname + ofs, oid, 0); + return append_ref(ref->name + ofs, ref->oid, 0); } static int append_tag_ref(const char *refname, const struct object_id *oid, @@ -454,27 +452,26 @@ static int append_tag_ref(const char *refname, const struct object_id *oid, static const char *match_ref_pattern = NULL; static int match_ref_slash = 0; -static int append_matching_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int append_matching_ref(const struct reference *ref, void *cb_data) { /* we want to allow pattern hold/ to show all * branches under refs/heads/hold/, and v0.99.9? to show * refs/tags/v0.99.9a and friends. */ const char *tail; - int slash = count_slashes(refname); - for (tail = refname; *tail && match_ref_slash < slash; ) + int slash = count_slashes(ref->name); + for (tail = ref->name; *tail && match_ref_slash < slash; ) if (*tail++ == '/') slash--; if (!*tail) return 0; if (wildmatch(match_ref_pattern, tail, 0)) return 0; - if (starts_with(refname, "refs/heads/")) - return append_head_ref(refname, NULL, oid, flag, cb_data); - if (starts_with(refname, "refs/tags/")) - return append_tag_ref(refname, oid, flag, cb_data); - return append_ref(refname, oid, 0); + if (starts_with(ref->name, "refs/heads/")) + return append_head_ref(ref, cb_data); + if (starts_with(ref->name, "refs/tags/")) + return append_tag_ref(ref->name, ref->oid, ref->flags, cb_data); + return append_ref(ref->name, ref->oid, 0); } static void snarf_refs(int head, int remotes) diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 0b6f9edf86c24f..4803b5e59865f6 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -66,26 +66,25 @@ struct show_ref_data { int show_head; }; -static int show_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cbdata) +static int show_ref(const struct reference *ref, void *cbdata) { struct show_ref_data *data = cbdata; - if (data->show_head && !strcmp(refname, "HEAD")) + if (data->show_head && !strcmp(ref->name, "HEAD")) goto match; if (data->patterns) { - int reflen = strlen(refname); + int reflen = strlen(ref->name); const char **p = data->patterns, *m; while ((m = *p++) != NULL) { int len = strlen(m); if (len > reflen) continue; - if (memcmp(m, refname + reflen - len, len)) + if (memcmp(m, ref->name + reflen - len, len)) continue; if (len == reflen) goto match; - if (refname[reflen - len - 1] == '/') + if (ref->name[reflen - len - 1] == '/') goto match; } return 0; @@ -94,18 +93,15 @@ static int show_ref(const char *refname, const char *referent UNUSED, const stru match: data->found_match++; - show_one(data->show_one_opts, refname, oid); + show_one(data->show_one_opts, ref->name, ref->oid); return 0; } -static int add_existing(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cbdata) +static int add_existing(const struct reference *ref, void *cbdata) { struct string_list *list = (struct string_list *)cbdata; - string_list_insert(list, refname); + string_list_insert(list, ref->name); return 0; } diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fcd73abe5336a9..35f6cf735e51cd 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -593,16 +593,12 @@ static void print_status(unsigned int flags, char state, const char *path, printf("\n"); } -static int handle_submodule_head_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int handle_submodule_head_ref(const struct reference *ref, void *cb_data) { struct object_id *output = cb_data; - if (oid) - oidcpy(output, oid); + if (ref->oid) + oidcpy(output, ref->oid); return 0; } diff --git a/builtin/worktree.c b/builtin/worktree.c index 812774a5ca992c..b7f323b5e4d73e 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -635,11 +635,7 @@ static void print_preparing_worktree_line(int detach, * * Returns 0 on failure and non-zero on success. */ -static int first_valid_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data UNUSED) +static int first_valid_ref(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } diff --git a/commit-graph.c b/commit-graph.c index 474454db73d094..f91af416259c84 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1851,18 +1851,16 @@ struct refs_cb_data { struct progress *progress; }; -static int add_ref_to_set(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *cb_data) +static int add_ref_to_set(const struct reference *ref, void *cb_data) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; - if (odb_read_object_info(data->repo->objects, oid, NULL) == OBJ_COMMIT) - oidset_insert(data->commits, oid); + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) + oidset_insert(data->commits, maybe_peeled); display_progress(data->progress, oidset_size(data->commits)); diff --git a/delta-islands.c b/delta-islands.c index 36c94799d69d7a..7cfebc4162b0e0 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -390,8 +390,7 @@ static void add_ref_to_island(kh_str_t *remote_islands, const char *island_name, rl->hash += sha_core; } -static int find_island_for_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, void *cb) +static int find_island_for_ref(const struct reference *ref, void *cb) { struct island_load_data *ild = cb; @@ -406,7 +405,7 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, /* walk backwards to get last-one-wins ordering */ for (i = ild->nr - 1; i >= 0; i--) { - if (!regexec(&ild->rx[i], refname, + if (!regexec(&ild->rx[i], ref->name, ARRAY_SIZE(matches), matches, 0)) break; } @@ -428,10 +427,10 @@ static int find_island_for_ref(const char *refname, const char *referent UNUSED, if (island_name.len) strbuf_addch(&island_name, '-'); - strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so); + strbuf_add(&island_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } - add_ref_to_island(ild->remote_islands, island_name.buf, oid); + add_ref_to_island(ild->remote_islands, island_name.buf, ref->oid); strbuf_release(&island_name); return 0; } diff --git a/fetch-pack.c b/fetch-pack.c index fe7a84bf2f97fa..78c45d4a155c89 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -188,13 +188,9 @@ static int rev_list_insert_ref(struct fetch_negotiator *negotiator, return 0; } -static int rev_list_insert_ref_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int rev_list_insert_ref_oid(const struct reference *ref, void *cb_data) { - return rev_list_insert_ref(cb_data, oid); + return rev_list_insert_ref(cb_data, ref->oid); } enum ack_type { @@ -616,13 +612,9 @@ static int mark_complete(const struct object_id *oid) return 0; } -static int mark_complete_oid(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete_oid(const struct reference *ref, void *cb_data UNUSED) { - return mark_complete(oid); + return mark_complete(ref->oid); } static void mark_recent_complete_commits(struct fetch_pack_args *args, diff --git a/help.c b/help.c index 5854dd4a7e468b..20e114432d7f85 100644 --- a/help.c +++ b/help.c @@ -851,18 +851,16 @@ struct similar_ref_cb { struct string_list *similar_refs; }; -static int append_similar_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data) +static int append_similar_ref(const struct reference *ref, void *cb_data) { struct similar_ref_cb *cb = (struct similar_ref_cb *)(cb_data); - char *branch = strrchr(refname, '/') + 1; + char *branch = strrchr(ref->name, '/') + 1; /* A remote branch of the same name is deemed similar */ - if (starts_with(refname, "refs/remotes/") && + if (starts_with(ref->name, "refs/remotes/") && !strcmp(branch, cb->base_ref)) string_list_append_nodup(cb->similar_refs, - refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), refname, 1)); + refs_shorten_unambiguous_ref(get_main_ref_store(the_repository), ref->name, 1)); return 0; } diff --git a/http-backend.c b/http-backend.c index 9084058f1e9f13..92e1733f14042c 100644 --- a/http-backend.c +++ b/http-backend.c @@ -513,18 +513,17 @@ static void run_service(const char **argv, int buffer_input) exit(1); } -static int show_text_ref(const char *name, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int show_text_ref(const struct reference *ref, void *cb_data) { - const char *name_nons = strip_namespace(name); + const char *name_nons = strip_namespace(ref->name); struct strbuf *buf = cb_data; - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(the_repository, ref->oid); if (!o) return 0; - strbuf_addf(buf, "%s\t%s\n", oid_to_hex(oid), name_nons); + strbuf_addf(buf, "%s\t%s\n", oid_to_hex(ref->oid), name_nons); if (o->type == OBJ_TAG) { - o = deref_tag(the_repository, o, name, 0); + o = deref_tag(the_repository, o, ref->name, 0); if (!o) return 0; strbuf_addf(buf, "%s\t%s^{}\n", oid_to_hex(&o->oid), @@ -569,21 +568,20 @@ static void get_info_refs(struct strbuf *hdr, char *arg UNUSED) strbuf_release(&buf); } -static int show_head_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int show_head_ref(const struct reference *ref, void *cb_data) { struct strbuf *buf = cb_data; - if (flag & REF_ISSYMREF) { + if (ref->flags & REF_ISSYMREF) { const char *target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, RESOLVE_REF_READING, NULL, NULL); if (target) strbuf_addf(buf, "ref: %s\n", strip_namespace(target)); } else { - strbuf_addf(buf, "%s\n", oid_to_hex(oid)); + strbuf_addf(buf, "%s\n", oid_to_hex(ref->oid)); } return 0; diff --git a/log-tree.c b/log-tree.c index 7d917f2a83d5ae..1729b0c201271b 100644 --- a/log-tree.c +++ b/log-tree.c @@ -147,9 +147,7 @@ static int ref_filter_match(const char *refname, return 1; } -static int add_ref_decoration(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref_decoration(const struct reference *ref, void *cb_data) { int i; struct object *obj; @@ -158,16 +156,16 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, struct decoration_filter *filter = (struct decoration_filter *)cb_data; const char *git_replace_ref_base = ref_namespace[NAMESPACE_REPLACE].ref; - if (filter && !ref_filter_match(refname, filter)) + if (filter && !ref_filter_match(ref->name, filter)) return 0; - if (starts_with(refname, git_replace_ref_base)) { + if (starts_with(ref->name, git_replace_ref_base)) { struct object_id original_oid; if (!replace_refs_enabled(the_repository)) return 0; - if (get_oid_hex(refname + strlen(git_replace_ref_base), + if (get_oid_hex(ref->name + strlen(git_replace_ref_base), &original_oid)) { - warning("invalid replace ref %s", refname); + warning("invalid replace ref %s", ref->name); return 0; } obj = parse_object(the_repository, &original_oid); @@ -176,10 +174,10 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, return 0; } - objtype = odb_read_object_info(the_repository->objects, oid, NULL); + objtype = odb_read_object_info(the_repository->objects, ref->oid, NULL); if (objtype < 0) return 0; - obj = lookup_object_by_type(the_repository, oid, objtype); + obj = lookup_object_by_type(the_repository, ref->oid, objtype); for (i = 0; i < ARRAY_SIZE(ref_namespace); i++) { struct ref_namespace_info *info = &ref_namespace[i]; @@ -187,24 +185,24 @@ static int add_ref_decoration(const char *refname, const char *referent UNUSED, if (!info->decoration) continue; if (info->exact) { - if (!strcmp(refname, info->ref)) { + if (!strcmp(ref->name, info->ref)) { deco_type = info->decoration; break; } - } else if (starts_with(refname, info->ref)) { + } else if (starts_with(ref->name, info->ref)) { deco_type = info->decoration; break; } } - add_name_decoration(deco_type, refname, obj); + add_name_decoration(deco_type, ref->name, obj); while (obj->type == OBJ_TAG) { if (!obj->parsed) parse_object(the_repository, &obj->oid); obj = ((struct tag *)obj)->tagged; if (!obj) break; - add_name_decoration(DECORATION_REF_TAG, refname, obj); + add_name_decoration(DECORATION_REF_TAG, ref->name, obj); } return 0; } diff --git a/ls-refs.c b/ls-refs.c index c47acde07f335b..64d02723691466 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -75,42 +75,42 @@ struct ls_refs_data { unsigned unborn : 1; }; -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { struct ls_refs_data *data = cb_data; - const char *refname_nons = strip_namespace(refname); + const char *refname_nons = strip_namespace(ref->name); strbuf_reset(&data->buf); - if (ref_is_hidden(refname_nons, refname, &data->hidden_refs)) + if (ref_is_hidden(refname_nons, ref->name, &data->hidden_refs)) return 0; if (!ref_match(&data->prefixes, refname_nons)) return 0; - if (oid) - strbuf_addf(&data->buf, "%s %s", oid_to_hex(oid), refname_nons); + if (ref->oid) + strbuf_addf(&data->buf, "%s %s", oid_to_hex(ref->oid), refname_nons); else strbuf_addf(&data->buf, "unborn %s", refname_nons); - if (data->symrefs && flag & REF_ISSYMREF) { + if (data->symrefs && ref->flags & REF_ISSYMREF) { + int unused_flag; struct object_id unused; const char *symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, + ref->name, 0, &unused, - &flag); + &unused_flag); if (!symref_target) - die("'%s' is a symref but it is not?", refname); + die("'%s' is a symref but it is not?", ref->name); strbuf_addf(&data->buf, " symref-target:%s", strip_namespace(symref_target)); } - if (data->peel && oid) { + if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } @@ -131,9 +131,17 @@ static void send_possibly_unborn_head(struct ls_refs_data *data) if (!refs_resolve_ref_unsafe(get_main_ref_store(the_repository), namespaced.buf, 0, &oid, &flag)) return; /* bad ref */ oid_is_null = is_null_oid(&oid); + if (!oid_is_null || - (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) - send_ref(namespaced.buf, NULL, oid_is_null ? NULL : &oid, flag, data); + (data->unborn && data->symrefs && (flag & REF_ISSYMREF))) { + struct reference ref = { + .name = namespaced.buf, + .oid = oid_is_null ? NULL : &oid, + .flags = flag, + }; + + send_ref(&ref, data); + } strbuf_release(&namespaced); } diff --git a/midx-write.c b/midx-write.c index c73010df6d3a4f..f4dd875747a4b6 100644 --- a/midx-write.c +++ b/midx-write.c @@ -697,28 +697,27 @@ static void prepare_midx_packing_data(struct packing_data *pdata, trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); } -static int add_ref_to_pending(const char *refname, const char *referent UNUSED, - const struct object_id *oid, - int flag, void *cb_data) +static int add_ref_to_pending(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info*)cb_data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", refname); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - if (!peel_iterated_oid(revs->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(revs->repo, oid, refname); + object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); if (object->type != OBJ_COMMIT) return 0; add_pending_object(revs, object, ""); - if (bitmap_is_preferred_refname(revs->repo, refname)) + if (bitmap_is_preferred_refname(revs->repo, ref->name)) object->flags |= NEEDS_BITMAP; return 0; } diff --git a/negotiator/default.c b/negotiator/default.c index c479da9b091570..116dedcf83035d 100644 --- a/negotiator/default.c +++ b/negotiator/default.c @@ -38,11 +38,10 @@ static void rev_list_push(struct negotiation_state *ns, } } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/negotiator/skipping.c b/negotiator/skipping.c index 616df6bf3af51c..0a272130fb1b6d 100644 --- a/negotiator/skipping.c +++ b/negotiator/skipping.c @@ -75,11 +75,10 @@ static struct entry *rev_list_push(struct data *data, struct commit *commit, int return entry; } -static int clear_marks(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int clear_marks(const struct reference *ref, void *cb_data UNUSED) { - struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + struct object *o = deref_tag(the_repository, parse_object(the_repository, ref->oid), + ref->name, 0); if (o && o->type == OBJ_COMMIT) clear_commit_marks((struct commit *)o, diff --git a/notes.c b/notes.c index 9a2e9181fe67d8..8e00fd8c470dd2 100644 --- a/notes.c +++ b/notes.c @@ -938,13 +938,11 @@ int combine_notes_cat_sort_uniq(struct object_id *cur_oid, return ret; } -static int string_list_add_one_ref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag UNUSED, void *cb) +static int string_list_add_one_ref(const struct reference *ref, void *cb) { struct string_list *refs = cb; - if (!unsorted_string_list_has_string(refs, refname)) - string_list_append(refs, refname); + if (!unsorted_string_list_has_string(refs, ref->name)) + string_list_append(refs, ref->name); return 0; } diff --git a/object-name.c b/object-name.c index f6902e140dd43e..7e8109f25fb839 100644 --- a/object-name.c +++ b/object-name.c @@ -1444,18 +1444,16 @@ struct handle_one_ref_cb { struct commit_list **list; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct handle_one_ref_cb *cb = cb_data; struct commit_list **list = cb->list; - struct object *object = parse_object(cb->repo, oid); + struct object *object = parse_object(cb->repo, ref->oid); if (!object) return 0; if (object->type == OBJ_TAG) { - object = deref_tag(cb->repo, object, path, - strlen(path)); + object = deref_tag(cb->repo, object, ref->name, + strlen(ref->name)); if (!object) return 0; } diff --git a/pseudo-merge.c b/pseudo-merge.c index 893b763fe45490..0abd51b42c185a 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -221,28 +221,25 @@ void load_pseudo_merges_from_config(struct repository *r, } } -static int find_pseudo_merge_group_for_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *_data) +static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_data) { struct bitmap_writer *writer = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct commit *c; uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + maybe_peeled = &peeled; - c = lookup_commit(the_repository, oid); + c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; - if (!packlist_find(writer->to_pack, oid)) + if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; - has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, maybe_peeled); for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { struct pseudo_merge_group *group; @@ -252,7 +249,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, size_t j; group = writer->pseudo_merge_groups.items[i].util; - if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + if (regexec(group->pattern, ref->name, ARRAY_SIZE(captures), captures, 0)) continue; @@ -269,7 +266,7 @@ static int find_pseudo_merge_group_for_ref(const char *refname, if (group_name.len) strbuf_addch(&group_name, '-'); - strbuf_add(&group_name, refname + match->rm_so, + strbuf_add(&group_name, ref->name + match->rm_so, match->rm_eo - match->rm_so); } diff --git a/reachable.c b/reachable.c index 22266db5233df7..b753c395530b6d 100644 --- a/reachable.c +++ b/reachable.c @@ -83,18 +83,17 @@ static void add_rebase_files(struct rev_info *revs) free_worktrees(worktrees); } -static int add_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data) +static int add_one_ref(const struct reference *ref, void *cb_data) { struct rev_info *revs = (struct rev_info *)cb_data; struct object *object; - if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { - warning("symbolic ref is dangling: %s", path); + if ((ref->flags & REF_ISSYMREF) && (ref->flags & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", ref->name); return 0; } - object = parse_object_or_die(the_repository, oid, path); + object = parse_object_or_die(the_repository, ref->oid, ref->name); add_pending_object(revs, object, ""); return 0; diff --git a/ref-filter.c b/ref-filter.c index 30cc488d8ab659..6837fa60a9b2b5 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2954,14 +2954,15 @@ struct ref_filter_cbdata { * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. */ -static int filter_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_one(const struct reference *ref, void *cb_data) { struct ref_filter_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (ref) - ref_array_append(ref_cbdata->array, ref); + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (item) + ref_array_append(ref_cbdata->array, item); return 0; } @@ -2990,17 +2991,18 @@ struct ref_filter_and_format_cbdata { } internal; }; -static int filter_and_format_one(const char *refname, const char *referent, const struct object_id *oid, int flag, void *cb_data) +static int filter_and_format_one(const struct reference *ref, void *cb_data) { struct ref_filter_and_format_cbdata *ref_cbdata = cb_data; - struct ref_array_item *ref; + struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - ref = apply_ref_filter(refname, referent, oid, flag, ref_cbdata->filter); - if (!ref) + item = apply_ref_filter(ref->name, ref->target, ref->oid, + ref->flags, ref_cbdata->filter); + if (!item) return 0; - if (format_ref_array_item(ref, ref_cbdata->format, &output, &err)) + if (format_ref_array_item(item, ref_cbdata->format, &output, &err)) die("%s", err.buf); if (output.len || !ref_cbdata->format->array_opts.omit_empty) { @@ -3010,7 +3012,7 @@ static int filter_and_format_one(const char *refname, const char *referent, cons strbuf_release(&output); strbuf_release(&err); - free_array_item(ref); + free_array_item(item); /* * Increment the running count of refs that match the filter. If diff --git a/reflog.c b/reflog.c index 65ef259b4f5e1e..ac87e20c4f97ff 100644 --- a/reflog.c +++ b/reflog.c @@ -423,16 +423,13 @@ int should_expire_reflog_ent_verbose(struct object_id *ooid, return expire; } -static int push_tip_to_list(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags, void *cb_data) +static int push_tip_to_list(const struct reference *ref, void *cb_data) { struct commit_list **list = cb_data; struct commit *tip_commit; - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; - tip_commit = lookup_commit_reference_gently(the_repository, oid, 1); + tip_commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!tip_commit) return 0; commit_list_insert(tip_commit, list); diff --git a/refs.c b/refs.c index 965381367e0e53..25f0579d610ddc 100644 --- a/refs.c +++ b/refs.c @@ -426,17 +426,19 @@ int refs_ref_exists(struct ref_store *refs, const char *refname) NULL, NULL); } -static int for_each_filter_refs(const char *refname, const char *referent, - const struct object_id *oid, - int flags, void *data) +static int for_each_filter_refs(const struct reference *ref, void *data) { struct for_each_ref_filter *filter = data; - if (wildmatch(filter->pattern, refname, 0)) + if (wildmatch(filter->pattern, ref->name, 0)) return 0; - if (filter->prefix) - skip_prefix(refname, filter->prefix, &refname); - return filter->fn(refname, referent, oid, flags, filter->cb_data); + if (filter->prefix) { + struct reference skipped = *ref; + skip_prefix(skipped.name, filter->prefix, &skipped.name); + return filter->fn(&skipped, filter->cb_data); + } else { + return filter->fn(ref, filter->cb_data); + } } struct warn_if_dangling_data { @@ -447,17 +449,15 @@ struct warn_if_dangling_data { int dry_run; }; -static int warn_if_dangling_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags, void *cb_data) +static int warn_if_dangling_symref(const struct reference *ref, void *cb_data) { struct warn_if_dangling_data *d = cb_data; const char *resolves_to, *msg; - if (!(flags & REF_ISSYMREF)) + if (!(ref->flags & REF_ISSYMREF)) return 0; - resolves_to = refs_resolve_ref_unsafe(d->refs, refname, 0, NULL, NULL); + resolves_to = refs_resolve_ref_unsafe(d->refs, ref->name, 0, NULL, NULL); if (!resolves_to || !string_list_has_string(d->refnames, resolves_to)) { return 0; @@ -466,7 +466,7 @@ static int warn_if_dangling_symref(const char *refname, const char *referent UNU msg = d->dry_run ? _("%s%s will become dangling after %s is deleted\n") : _("%s%s has become dangling after %s was deleted\n"); - fprintf(d->fp, msg, d->indent, refname, resolves_to); + fprintf(d->fp, msg, d->indent, ref->name, resolves_to); return 0; } @@ -507,8 +507,15 @@ int refs_head_ref_namespaced(struct ref_store *refs, each_ref_fn fn, void *cb_da int flag; strbuf_addf(&buf, "%sHEAD", get_git_namespace()); - if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) - ret = fn(buf.buf, NULL, &oid, flag, cb_data); + if (!refs_read_ref_full(refs, buf.buf, RESOLVE_REF_READING, &oid, &flag)) { + struct reference ref = { + .name = buf.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } strbuf_release(&buf); return ret; @@ -1741,8 +1748,15 @@ int refs_head_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data) int flag; if (refs_resolve_ref_unsafe(refs, "HEAD", RESOLVE_REF_READING, - &oid, &flag)) - return fn("HEAD", NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = "HEAD", + .oid = &oid, + .flags = flag, + }; + + return fn(&ref, cb_data); + } return 0; } @@ -2753,14 +2767,10 @@ struct do_for_each_reflog_help { void *cb_data; }; -static int do_for_each_reflog_helper(const char *refname, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int do_for_each_reflog_helper(const struct reference *ref, void *cb_data) { struct do_for_each_reflog_help *hp = cb_data; - return hp->fn(refname, hp->cb_data); + return hp->fn(ref->name, hp->cb_data); } int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_data) @@ -2976,25 +2986,24 @@ struct migration_data { uint64_t index; }; -static int migrate_one_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int migrate_one_ref(const struct reference *ref, void *cb_data) { struct migration_data *data = cb_data; struct strbuf symref_target = STRBUF_INIT; int ret; - if (flags & REF_ISSYMREF) { - ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target); + if (ref->flags & REF_ISSYMREF) { + ret = refs_read_symbolic_ref(data->old_refs, ref->name, &symref_target); if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(the_hash_algo), + ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(the_hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) goto done; } else { - ret = ref_transaction_create(data->transaction, refname, oid, NULL, + ret = ref_transaction_create(data->transaction, ref->name, ref->oid, NULL, REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION, NULL, data->errbuf); if (ret < 0) diff --git a/refs.h b/refs.h index 4e6bd63aa86c54..68d235438c2b32 100644 --- a/refs.h +++ b/refs.h @@ -355,14 +355,32 @@ struct ref_transaction; */ #define REF_BAD_NAME 0x08 +/* A reference passed to `for_each_ref()`-style callbacks. */ +struct reference { + /* The fully-qualified name of the reference. */ + const char *name; + + /* The target of a symbolic ref. `NULL` for direct references. */ + const char *target; + + /* + * The object ID of a reference. Either the direct object ID or the + * resolved object ID in the case of a symbolic ref. May be the zero + * object ID in case the symbolic ref cannot be resolved. + */ + const struct object_id *oid; + + /* A bitfield of `REF_` flags. */ + int flags; +}; + /* * The signature for the callback function for the for_each_*() - * functions below. The memory pointed to by the refname and oid - * arguments is only guaranteed to be valid for the duration of a + * functions below. The memory pointed to by the `struct reference` + * argument is only guaranteed to be valid for the duration of a * single callback invocation. */ -typedef int each_ref_fn(const char *refname, const char *referent, - const struct object_id *oid, int flags, void *cb_data); +typedef int each_ref_fn(const struct reference *ref, void *cb_data); /* * The following functions invoke the specified callback function for diff --git a/refs/files-backend.c b/refs/files-backend.c index 8d7007f4aaa9da..eb3142f8f2dd32 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3150,14 +3150,11 @@ static int parse_and_write_reflog(struct files_ref_store *refs, return 0; } -static int ref_present(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, - void *cb_data) +static int ref_present(const struct reference *ref, void *cb_data) { struct string_list *affected_refnames = cb_data; - return string_list_has_string(affected_refnames, refname); + return string_list_has_string(affected_refnames, ref->name); } static int files_transaction_finish_initial(struct files_ref_store *refs, diff --git a/refs/iterator.c b/refs/iterator.c index 17ef841d8a3013..7f2e718f1c9107 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -476,7 +476,14 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - retval = fn(iter->refname, iter->referent, iter->oid, iter->flags, cb_data); + struct reference ref = { + .name = iter->refname, + .target = iter->referent, + .oid = iter->oid, + .flags = iter->flags, + }; + + retval = fn(&ref, cb_data); if (retval) goto out; } diff --git a/remote.c b/remote.c index df9675cd330ed1..59b371512084eb 100644 --- a/remote.c +++ b/remote.c @@ -2315,21 +2315,19 @@ int format_tracking_info(struct branch *branch, struct strbuf *sb, return 1; } -static int one_local_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int one_local_ref(const struct reference *ref, void *cb_data) { struct ref ***local_tail = cb_data; - struct ref *ref; + struct ref *local_ref; /* we already know it starts with refs/ to get here */ - if (check_refname_format(refname + 5, 0)) + if (check_refname_format(ref->name + 5, 0)) return 0; - ref = alloc_ref(refname); - oidcpy(&ref->new_oid, oid); - **local_tail = ref; - *local_tail = &ref->next; + local_ref = alloc_ref(ref->name); + oidcpy(&local_ref->new_oid, ref->oid); + **local_tail = local_ref; + *local_tail = &local_ref->next; return 0; } @@ -2402,15 +2400,14 @@ struct stale_heads_info { struct refspec *rs; }; -static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data) +static int get_stale_heads_cb(const struct reference *ref, void *cb_data) { struct stale_heads_info *info = cb_data; struct string_list matches = STRING_LIST_INIT_DUP; struct refspec_item query; int i, stale = 1; memset(&query, 0, sizeof(struct refspec_item)); - query.dst = (char *)refname; + query.dst = (char *)ref->name; refspec_find_all_matches(info->rs, &query, &matches); if (matches.nr == 0) @@ -2423,7 +2420,7 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, * overlapping refspecs, we need to go over all of the * matching refs. */ - if (flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) goto clean_exit; for (i = 0; stale && i < matches.nr; i++) @@ -2431,8 +2428,8 @@ static int get_stale_heads_cb(const char *refname, const char *referent UNUSED, stale = 0; if (stale) { - struct ref *ref = make_linked_ref(refname, &info->stale_refs_tail); - oidcpy(&ref->new_oid, oid); + struct ref *linked_ref = make_linked_ref(ref->name, &info->stale_refs_tail); + oidcpy(&linked_ref->new_oid, ref->oid); } clean_exit: diff --git a/repack-midx.c b/repack-midx.c index 6f6202c5bccd89..349f7e20b53f25 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -16,25 +16,23 @@ struct midx_snapshot_ref_data { int preferred; }; -static int midx_snapshot_ref_one(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, void *_data) +static int midx_snapshot_ref_one(const struct reference *ref, void *_data) { struct midx_snapshot_ref_data *data = _data; + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, oid, &peeled)) - oid = &peeled; + if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + maybe_peeled = &peeled; - if (oidset_insert(&data->seen, oid)) + if (oidset_insert(&data->seen, maybe_peeled)) return 0; /* already seen */ - if (odb_read_object_info(data->repo->objects, oid, NULL) != OBJ_COMMIT) + if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "", - oid_to_hex(oid)); + oid_to_hex(maybe_peeled)); return 0; } diff --git a/replace-object.c b/replace-object.c index 3eae0510745eae..03d0f1f083bed9 100644 --- a/replace-object.c +++ b/replace-object.c @@ -8,31 +8,27 @@ #include "repository.h" #include "commit.h" -static int register_replace_ref(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int register_replace_ref(const struct reference *ref, void *cb_data) { struct repository *r = cb_data; /* Get sha1 from refname */ - const char *slash = strrchr(refname, '/'); - const char *hash = slash ? slash + 1 : refname; + const char *slash = strrchr(ref->name, '/'); + const char *hash = slash ? slash + 1 : ref->name; struct replace_object *repl_obj = xmalloc(sizeof(*repl_obj)); if (get_oid_hex_algop(hash, &repl_obj->original.oid, r->hash_algo)) { free(repl_obj); - warning(_("bad replace ref name: %s"), refname); + warning(_("bad replace ref name: %s"), ref->name); return 0; } /* Copy sha1 from the read ref */ - oidcpy(&repl_obj->replacement, oid); + oidcpy(&repl_obj->replacement, ref->oid); /* Register new object */ if (oidmap_put(&r->objects->replace_map, repl_obj)) - die(_("duplicate replace ref: %s"), refname); + die(_("duplicate replace ref: %s"), ref->name); return 0; } diff --git a/revision.c b/revision.c index cf5e6c1ec9e4e1..5f0850ae5c9c1a 100644 --- a/revision.c +++ b/revision.c @@ -1644,19 +1644,17 @@ struct all_refs_cb { struct worktree *wt; }; -static int handle_one_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int handle_one_ref(const struct reference *ref, void *cb_data) { struct all_refs_cb *cb = cb_data; struct object *object; - if (ref_excluded(&cb->all_revs->ref_excludes, path)) + if (ref_excluded(&cb->all_revs->ref_excludes, ref->name)) return 0; - object = get_reference(cb->all_revs, path, oid, cb->all_flags); - add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags); - add_pending_object(cb->all_revs, object, path); + object = get_reference(cb->all_revs, ref->name, ref->oid, cb->all_flags); + add_rev_cmdline(cb->all_revs, object, ref->name, REV_CMD_REF, cb->all_flags); + add_pending_object(cb->all_revs, object, ref->name); return 0; } diff --git a/server-info.c b/server-info.c index 1d33de821e9f5e..0a07c722e8bfe5 100644 --- a/server-info.c +++ b/server-info.c @@ -148,23 +148,21 @@ static int update_info_file(struct repository *r, char *path, return ret; } -static int add_info_ref(const char *path, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, - void *cb_data) +static int add_info_ref(const struct reference *ref, void *cb_data) { struct update_info_ctx *uic = cb_data; - struct object *o = parse_object(uic->repo, oid); + struct object *o = parse_object(uic->repo, ref->oid); if (!o) return -1; - if (uic_printf(uic, "%s %s\n", oid_to_hex(oid), path) < 0) + if (uic_printf(uic, "%s %s\n", oid_to_hex(ref->oid), ref->name) < 0) return -1; if (o->type == OBJ_TAG) { - o = deref_tag(uic->repo, o, path, 0); + o = deref_tag(uic->repo, o, ref->name, 0); if (o) if (uic_printf(uic, "%s %s^{}\n", - oid_to_hex(&o->oid), path) < 0) + oid_to_hex(&o->oid), ref->name) < 0) return -1; } return 0; diff --git a/shallow.c b/shallow.c index d9cd4e219cb07d..55b9cd9d3f29f1 100644 --- a/shallow.c +++ b/shallow.c @@ -626,14 +626,10 @@ static void paint_down(struct paint_info *info, const struct object_id *oid, free(tmp); } -static int mark_uninteresting(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data UNUSED) +static int mark_uninteresting(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (!commit) return 0; commit->object.flags |= UNINTERESTING; @@ -742,16 +738,12 @@ struct commit_array { size_t nr, alloc; }; -static int add_ref(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *cb_data) +static int add_ref(const struct reference *ref, void *cb_data) { struct commit_array *ca = cb_data; ALLOC_GROW(ca->commits, ca->nr + 1, ca->alloc); ca->commits[ca->nr] = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (ca->commits[ca->nr]) ca->nr++; return 0; diff --git a/submodule.c b/submodule.c index 35c55155f7bf83..40a5c6fb9d1545 100644 --- a/submodule.c +++ b/submodule.c @@ -934,10 +934,7 @@ static void free_submodules_data(struct string_list *submodules) string_list_clear(submodules, 1); } -static int has_remote(const char *refname UNUSED, - const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flags UNUSED, void *cb_data UNUSED) +static int has_remote(const struct reference *ref UNUSED, void *cb_data UNUSED) { return 1; } @@ -1255,13 +1252,10 @@ int push_unpushed_submodules(struct repository *r, return ret; } -static int append_oid_to_array(const char *ref UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, void *data) +static int append_oid_to_array(const struct reference *ref, void *data) { struct oid_array *array = data; - oid_array_append(array, oid); + oid_array_append(array, ref->oid); return 0; } diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 83b06d39a36235..b1215947c5e67a 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -154,10 +154,9 @@ static int cmd_rename_ref(struct ref_store *refs, const char **argv) return refs_rename_ref(refs, oldref, newref, logmsg); } -static int each_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flags, void *cb_data UNUSED) +static int each_ref(const struct reference *ref, void *cb_data UNUSED) { - printf("%s %s 0x%x\n", oid_to_hex(oid), refname, flags); + printf("%s %s 0x%x\n", oid_to_hex(ref->oid), ref->name, ref->flags); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 1e87ae95593063..0d563ae74e92be 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -870,8 +870,8 @@ static void send_unshallow(struct upload_pack_data *data) } } -static int check_ref(const char *refname_full, const char *referent UNUSED, const struct object_id *oid, - int flag, void *cb_data); +static int check_ref(const struct reference *ref, void *cb_data); + static void deepen(struct upload_pack_data *data, int depth) { if (depth == INFINITE_DEPTH && !is_repository_shallow(the_repository)) { @@ -1224,13 +1224,12 @@ static int mark_our_ref(const char *refname, const char *refname_full, return 0; } -static int check_ref(const char *refname_full, const char *referent UNUSED,const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int check_ref(const struct reference *ref, void *cb_data) { - const char *refname = strip_namespace(refname_full); + const char *refname = strip_namespace(ref->name); struct upload_pack_data *data = cb_data; - mark_our_ref(refname, refname_full, oid, &data->hidden_refs); + mark_our_ref(refname, ref->name, ref->oid, &data->hidden_refs); return 0; } @@ -1292,27 +1291,25 @@ static void write_v0_ref(struct upload_pack_data *data, return; } -static int send_ref(const char *refname, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data) +static int send_ref(const struct reference *ref, void *cb_data) { - write_v0_ref(cb_data, refname, strip_namespace(refname), oid); + write_v0_ref(cb_data, ref->name, strip_namespace(ref->name), ref->oid); return 0; } -static int find_symref(const char *refname, const char *referent UNUSED, - const struct object_id *oid UNUSED, - int flag, void *cb_data) +static int find_symref(const struct reference *ref, void *cb_data) { const char *symref_target; struct string_list_item *item; + int flag; - if ((flag & REF_ISSYMREF) == 0) + if ((ref->flags & REF_ISSYMREF) == 0) return 0; symref_target = refs_resolve_ref_unsafe(get_main_ref_store(the_repository), - refname, 0, NULL, &flag); + ref->name, 0, NULL, &flag); if (!symref_target || (flag & REF_ISSYMREF) == 0) - die("'%s' is a symref but it is not?", refname); - item = string_list_append(cb_data, strip_namespace(refname)); + die("'%s' is a symref but it is not?", ref->name); + item = string_list_append(cb_data, strip_namespace(ref->name)); item->util = xstrdup(strip_namespace(symref_target)); return 0; } diff --git a/walker.c b/walker.c index 80737545172bbd..409b646578a3d4 100644 --- a/walker.c +++ b/walker.c @@ -226,14 +226,10 @@ static int interpret_target(struct walker *walker, char *target, struct object_i return -1; } -static int mark_complete(const char *path UNUSED, - const char *referent UNUSED, - const struct object_id *oid, - int flag UNUSED, - void *cb_data UNUSED) +static int mark_complete(const struct reference *ref, void *cb_data UNUSED) { struct commit *commit = lookup_commit_reference_gently(the_repository, - oid, 1); + ref->oid, 1); if (commit) { commit->object.flags |= COMPLETE; diff --git a/worktree.c b/worktree.c index a2a5f51f29fca2..9308389cb6f029 100644 --- a/worktree.c +++ b/worktree.c @@ -595,8 +595,15 @@ int other_head_refs(each_ref_fn fn, void *cb_data) if (refs_resolve_ref_unsafe(get_main_ref_store(the_repository), refname.buf, RESOLVE_REF_READING, - &oid, &flag)) - ret = fn(refname.buf, NULL, &oid, flag, cb_data); + &oid, &flag)) { + struct reference ref = { + .name = refname.buf, + .oid = &oid, + .flags = flag, + }; + + ret = fn(&ref, cb_data); + } if (ret) break; } From 89baa52da612dde6da031acfa2cb957d4297d544 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:11 +0200 Subject: [PATCH 025/553] refs: introduce `.ref` field for the base iterator The base iterator has a couple of fields that tracks the name, target, object ID and flags for the current reference. Due to this design we have to create a new `struct reference` whenever we want to hand over that reference to the callback function, which is tedious and not very efficient. Convert the structure to instead contain a `struct reference` as member. This member is expected to be populated by the implementations of the iterator and is handed over to the callback directly. While at it, simplify `should_pack_ref()` to take a `struct reference` directly instead of passing its respective fields. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 8 +++---- refs/debug.c | 8 +++---- refs/files-backend.c | 47 ++++++++++++++++++----------------------- refs/iterator.c | 39 +++++++++++----------------------- refs/packed-backend.c | 46 ++++++++++++++++++++-------------------- refs/ref-cache.c | 10 ++++----- refs/refs-internal.h | 5 +---- refs/reftable-backend.c | 12 +++++------ 8 files changed, 75 insertions(+), 100 deletions(-) diff --git a/refs.c b/refs.c index 25f0579d610ddc..f96cf43b128a27 100644 --- a/refs.c +++ b/refs.c @@ -2327,8 +2327,8 @@ int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) int peel_iterated_oid(struct repository *r, const struct object_id *base, struct object_id *peeled) { if (current_ref_iter && - (current_ref_iter->oid == base || - oideq(current_ref_iter->oid, base))) + (current_ref_iter->ref.oid == base || + oideq(current_ref_iter->ref.oid, base))) return ref_iterator_peel(current_ref_iter, peeled); return peel_object(r, base, peeled) ? -1 : 0; @@ -2703,7 +2703,7 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs while ((ok = ref_iterator_advance(iter)) == ITER_OK) { if (skip && - string_list_has_string(skip, iter->refname)) + string_list_has_string(skip, iter->ref.name)) continue; if (transaction && ref_transaction_maybe_set_rejected( @@ -2712,7 +2712,7 @@ enum ref_transaction_error refs_verify_refnames_available(struct ref_store *refs continue; strbuf_addf(err, _("'%s' exists; cannot create '%s'"), - iter->refname, refname); + iter->ref.name, refname); goto cleanup; } diff --git a/refs/debug.c b/refs/debug.c index 697adbd0dc3f65..67718bd1f49f1f 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -160,11 +160,9 @@ static int debug_ref_iterator_advance(struct ref_iterator *ref_iterator) trace_printf_key(&trace_refs, "iterator_advance: (%d)\n", res); else trace_printf_key(&trace_refs, "iterator_advance: %s (0)\n", - diter->iter->refname); + diter->iter->ref.name); - diter->base.refname = diter->iter->refname; - diter->base.oid = diter->iter->oid; - diter->base.flags = diter->iter->flags; + diter->base.ref = diter->iter->ref; return res; } @@ -185,7 +183,7 @@ static int debug_ref_iterator_peel(struct ref_iterator *ref_iterator, struct debug_ref_iterator *diter = (struct debug_ref_iterator *)ref_iterator; int res = diter->iter->vtable->peel(diter->iter, peeled); - trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->refname, res); + trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->ref.name, res); return res; } diff --git a/refs/files-backend.c b/refs/files-backend.c index eb3142f8f2dd32..fac53fa052dd22 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -961,26 +961,23 @@ static int files_ref_iterator_advance(struct ref_iterator *ref_iterator) while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && - parse_worktree_ref(iter->iter0->refname, NULL, NULL, + parse_worktree_ref(iter->iter0->ref.name, NULL, NULL, NULL) != REF_WORKTREE_CURRENT) continue; if ((iter->flags & DO_FOR_EACH_OMIT_DANGLING_SYMREFS) && - (iter->iter0->flags & REF_ISSYMREF) && - (iter->iter0->flags & REF_ISBROKEN)) + (iter->iter0->ref.flags & REF_ISSYMREF) && + (iter->iter0->ref.flags & REF_ISBROKEN)) continue; if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) && - !ref_resolves_to_object(iter->iter0->refname, + !ref_resolves_to_object(iter->iter0->ref.name, iter->repo, - iter->iter0->oid, - iter->iter0->flags)) + iter->iter0->ref.oid, + iter->iter0->ref.flags)) continue; - iter->base.refname = iter->iter0->refname; - iter->base.oid = iter->iter0->oid; - iter->base.flags = iter->iter0->flags; - iter->base.referent = iter->iter0->referent; + iter->base.ref = iter->iter0->ref; return ITER_OK; } @@ -1367,30 +1364,29 @@ static void prune_refs(struct files_ref_store *refs, struct ref_to_prune **refs_ * Return true if the specified reference should be packed. */ static int should_pack_ref(struct files_ref_store *refs, - const char *refname, - const struct object_id *oid, unsigned int ref_flags, + const struct reference *ref, struct pack_refs_opts *opts) { struct string_list_item *item; /* Do not pack per-worktree refs: */ - if (parse_worktree_ref(refname, NULL, NULL, NULL) != + if (parse_worktree_ref(ref->name, NULL, NULL, NULL) != REF_WORKTREE_SHARED) return 0; /* Do not pack symbolic refs: */ - if (ref_flags & REF_ISSYMREF) + if (ref->flags & REF_ISSYMREF) return 0; /* Do not pack broken refs: */ - if (!ref_resolves_to_object(refname, refs->base.repo, oid, ref_flags)) + if (!ref_resolves_to_object(ref->name, refs->base.repo, ref->oid, ref->flags)) return 0; - if (ref_excluded(opts->exclusions, refname)) + if (ref_excluded(opts->exclusions, ref->name)) return 0; for_each_string_list_item(item, opts->includes) - if (!wildmatch(item->string, refname, 0)) + if (!wildmatch(item->string, ref->name, 0)) return 1; return 0; @@ -1443,8 +1439,7 @@ static int should_pack_refs(struct files_ref_store *refs, iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL, refs->base.repo, 0); while ((ret = ref_iterator_advance(iter)) == ITER_OK) { - if (should_pack_ref(refs, iter->refname, iter->oid, - iter->flags, opts)) + if (should_pack_ref(refs, &iter->ref, opts)) refcount++; if (refcount >= limit) { ref_iterator_free(iter); @@ -1489,24 +1484,24 @@ static int files_pack_refs(struct ref_store *ref_store, * in the packed ref cache. If the reference should be * pruned, also add it to refs_to_prune. */ - if (!should_pack_ref(refs, iter->refname, iter->oid, iter->flags, opts)) + if (!should_pack_ref(refs, &iter->ref, opts)) continue; /* * Add a reference creation for this reference to the * packed-refs transaction: */ - if (ref_transaction_update(transaction, iter->refname, - iter->oid, NULL, NULL, NULL, + if (ref_transaction_update(transaction, iter->ref.name, + iter->ref.oid, NULL, NULL, NULL, REF_NO_DEREF, NULL, &err)) die("failure preparing to create packed reference %s: %s", - iter->refname, err.buf); + iter->ref.name, err.buf); /* Schedule the loose reference for pruning if requested. */ if ((opts->flags & PACK_REFS_PRUNE)) { struct ref_to_prune *n; - FLEX_ALLOC_STR(n, name, iter->refname); - oidcpy(&n->oid, iter->oid); + FLEX_ALLOC_STR(n, name, iter->ref.name); + oidcpy(&n->oid, iter->ref.oid); n->next = refs_to_prune; refs_to_prune = n; } @@ -2379,7 +2374,7 @@ static int files_reflog_iterator_advance(struct ref_iterator *ref_iterator) REFNAME_ALLOW_ONELEVEL)) continue; - iter->base.refname = diter->relative_path; + iter->base.ref.name = diter->relative_path; return ITER_OK; } diff --git a/refs/iterator.c b/refs/iterator.c index 7f2e718f1c9107..fe5980e1b6c96b 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -41,10 +41,7 @@ void base_ref_iterator_init(struct ref_iterator *iter, struct ref_iterator_vtable *vtable) { iter->vtable = vtable; - iter->refname = NULL; - iter->referent = NULL; - iter->oid = NULL; - iter->flags = 0; + memset(&iter->ref, 0, sizeof(iter->ref)); } struct empty_ref_iterator { @@ -127,8 +124,8 @@ enum iterator_selection ref_iterator_select(struct ref_iterator *iter_worktree, * latter. */ if (iter_worktree) { - int cmp = strcmp(iter_worktree->refname, - iter_common->refname); + int cmp = strcmp(iter_worktree->ref.name, + iter_common->ref.name); if (cmp < 0) return ITER_SELECT_0; else if (!cmp) @@ -139,7 +136,7 @@ enum iterator_selection ref_iterator_select(struct ref_iterator *iter_worktree, * We now know that the lexicographically-next ref is a common * ref. When the common ref is a shared one we return it. */ - if (parse_worktree_ref(iter_common->refname, NULL, NULL, + if (parse_worktree_ref(iter_common->ref.name, NULL, NULL, NULL) == REF_WORKTREE_SHARED) return ITER_SELECT_1; @@ -212,10 +209,7 @@ static int merge_ref_iterator_advance(struct ref_iterator *ref_iterator) } if (selection & ITER_YIELD_CURRENT) { - iter->base.referent = (*iter->current)->referent; - iter->base.refname = (*iter->current)->refname; - iter->base.oid = (*iter->current)->oid; - iter->base.flags = (*iter->current)->flags; + iter->base.ref = (*iter->current)->ref; return ITER_OK; } } @@ -313,7 +307,7 @@ static enum iterator_selection overlay_iterator_select( else if (!front) return ITER_SELECT_1; - cmp = strcmp(front->refname, back->refname); + cmp = strcmp(front->ref.name, back->ref.name); if (cmp < 0) return ITER_SELECT_0; @@ -371,7 +365,7 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) int ok; while ((ok = ref_iterator_advance(iter->iter0)) == ITER_OK) { - int cmp = compare_prefix(iter->iter0->refname, iter->prefix); + int cmp = compare_prefix(iter->iter0->ref.name, iter->prefix); if (cmp < 0) continue; /* @@ -382,6 +376,8 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) if (cmp > 0) return ITER_DONE; + iter->base.ref = iter->iter0->ref; + if (iter->trim) { /* * It is nonsense to trim off characters that @@ -392,15 +388,11 @@ static int prefix_ref_iterator_advance(struct ref_iterator *ref_iterator) * one character left in the refname after * trimming, report it as a bug: */ - if (strlen(iter->iter0->refname) <= iter->trim) + if (strlen(iter->base.ref.name) <= iter->trim) BUG("attempt to trim too many characters"); - iter->base.refname = iter->iter0->refname + iter->trim; - } else { - iter->base.refname = iter->iter0->refname; + iter->base.ref.name += iter->trim; } - iter->base.oid = iter->iter0->oid; - iter->base.flags = iter->iter0->flags; return ITER_OK; } @@ -476,14 +468,7 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { - struct reference ref = { - .name = iter->refname, - .target = iter->referent, - .oid = iter->oid, - .flags = iter->flags, - }; - - retval = fn(&ref, cb_data); + retval = fn(&iter->ref, cb_data); if (retval) goto out; } diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a8c22a0a7ff8cd..7987acdc96a14b 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -908,7 +908,7 @@ static int next_record(struct packed_ref_iterator *iter) if (iter->pos == iter->eof) return ITER_DONE; - iter->base.flags = REF_ISPACKED; + iter->base.ref.flags = REF_ISPACKED; p = iter->pos; if (iter->eof - p < snapshot_hexsz(iter->snapshot) + 2 || @@ -923,22 +923,22 @@ static int next_record(struct packed_ref_iterator *iter) iter->pos, iter->eof - iter->pos); strbuf_add(&iter->refname_buf, p, eol - p); - iter->base.refname = iter->refname_buf.buf; + iter->base.ref.name = iter->refname_buf.buf; if (refname_contains_nul(&iter->refname_buf)) - die("packed refname contains embedded NULL: %s", iter->base.refname); + die("packed refname contains embedded NULL: %s", iter->base.ref.name); - if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { - if (!refname_is_safe(iter->base.refname)) + if (check_refname_format(iter->base.ref.name, REFNAME_ALLOW_ONELEVEL)) { + if (!refname_is_safe(iter->base.ref.name)) die("packed refname is dangerous: %s", - iter->base.refname); + iter->base.ref.name); oidclr(&iter->oid, iter->repo->hash_algo); - iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN; + iter->base.ref.flags |= REF_BAD_NAME | REF_ISBROKEN; } if (iter->snapshot->peeled == PEELED_FULLY || (iter->snapshot->peeled == PEELED_TAGS && - starts_with(iter->base.refname, "refs/tags/"))) - iter->base.flags |= REF_KNOWS_PEELED; + starts_with(iter->base.ref.name, "refs/tags/"))) + iter->base.ref.flags |= REF_KNOWS_PEELED; iter->pos = eol + 1; @@ -956,11 +956,11 @@ static int next_record(struct packed_ref_iterator *iter) * definitely know the value of *this* reference. But * we suppress it if the reference is broken: */ - if ((iter->base.flags & REF_ISBROKEN)) { + if ((iter->base.ref.flags & REF_ISBROKEN)) { oidclr(&iter->peeled, iter->repo->hash_algo); - iter->base.flags &= ~REF_KNOWS_PEELED; + iter->base.ref.flags &= ~REF_KNOWS_PEELED; } else { - iter->base.flags |= REF_KNOWS_PEELED; + iter->base.ref.flags |= REF_KNOWS_PEELED; } } else { oidclr(&iter->peeled, iter->repo->hash_algo); @@ -976,15 +976,15 @@ static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator) int ok; while ((ok = next_record(iter)) == ITER_OK) { - const char *refname = iter->base.refname; + const char *refname = iter->base.ref.name; const char *prefix = iter->prefix; if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && - !is_per_worktree_ref(iter->base.refname)) + !is_per_worktree_ref(iter->base.ref.name)) continue; if (!(iter->flags & DO_FOR_EACH_INCLUDE_BROKEN) && - !ref_resolves_to_object(iter->base.refname, iter->repo, + !ref_resolves_to_object(iter->base.ref.name, iter->repo, &iter->oid, iter->flags)) continue; @@ -1033,10 +1033,10 @@ static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator, struct packed_ref_iterator *iter = (struct packed_ref_iterator *)ref_iterator; - if ((iter->base.flags & REF_KNOWS_PEELED)) { + if ((iter->base.ref.flags & REF_KNOWS_PEELED)) { oidcpy(peeled, &iter->peeled); return is_null_oid(&iter->peeled) ? -1 : 0; - } else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) { + } else if ((iter->base.ref.flags & (REF_ISBROKEN | REF_ISSYMREF))) { return -1; } else { return peel_object(iter->repo, &iter->oid, peeled) ? -1 : 0; @@ -1194,7 +1194,7 @@ static struct ref_iterator *packed_ref_iterator_begin( iter->snapshot = snapshot; acquire_snapshot(snapshot); strbuf_init(&iter->refname_buf, 0); - iter->base.oid = &iter->oid; + iter->base.ref.oid = &iter->oid; iter->repo = ref_store->repo; iter->flags = flags; @@ -1436,7 +1436,7 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re if (!iter) cmp = +1; else - cmp = strcmp(iter->refname, update->refname); + cmp = strcmp(iter->ref.name, update->refname); } if (!cmp) { @@ -1459,11 +1459,11 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re } goto error; - } else if (!oideq(&update->old_oid, iter->oid)) { + } else if (!oideq(&update->old_oid, iter->ref.oid)) { strbuf_addf(err, "cannot update ref '%s': " "is at %s but expected %s", update->refname, - oid_to_hex(iter->oid), + oid_to_hex(iter->ref.oid), oid_to_hex(&update->old_oid)); ret = REF_TRANSACTION_ERROR_INCORRECT_OLD_VALUE; @@ -1527,8 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re struct object_id peeled; int peel_error = ref_iterator_peel(iter, &peeled); - if (write_packed_entry(out, iter->refname, - iter->oid, + if (write_packed_entry(out, iter->ref.name, + iter->ref.oid, peel_error ? NULL : &peeled)) goto write_error; diff --git a/refs/ref-cache.c b/refs/ref-cache.c index e5e5df16d85e40..f1abc39624166e 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -425,10 +425,10 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level->prefix_state = entry_prefix_state; level->index = -1; } else { - iter->base.refname = entry->name; - iter->base.referent = entry->u.value.referent; - iter->base.oid = &entry->u.value.oid; - iter->base.flags = entry->flag; + iter->base.ref.name = entry->name; + iter->base.ref.target = entry->u.value.referent; + iter->base.ref.oid = &entry->u.value.oid; + iter->base.ref.flags = entry->flag; return ITER_OK; } } @@ -550,7 +550,7 @@ static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, { struct cache_ref_iterator *iter = (struct cache_ref_iterator *)ref_iterator; - return peel_object(iter->repo, ref_iterator->oid, peeled) ? -1 : 0; + return peel_object(iter->repo, ref_iterator->ref.oid, peeled) ? -1 : 0; } static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 4ef3bd75c6ae55..ed749d16572dac 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -249,10 +249,7 @@ const char *find_descendant_ref(const char *dirname, */ struct ref_iterator { struct ref_iterator_vtable *vtable; - const char *refname; - const char *referent; - const struct object_id *oid; - unsigned int flags; + struct reference ref; }; /* diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index d4b792862024fc..0e47986cb5b699 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -704,10 +704,10 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->oid, flags)) continue; - iter->base.refname = iter->ref.refname; - iter->base.referent = referent; - iter->base.oid = &iter->oid; - iter->base.flags = flags; + iter->base.ref.name = iter->ref.refname; + iter->base.ref.target = referent; + iter->base.ref.oid = &iter->oid; + iter->base.ref.flags = flags; break; } @@ -828,7 +828,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ iter = xcalloc(1, sizeof(*iter)); base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable); - iter->base.oid = &iter->oid; + iter->base.ref.oid = &iter->oid; iter->flags = flags; iter->refs = refs; iter->exclude_patterns = filter_exclude_patterns(exclude_patterns); @@ -2072,7 +2072,7 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator) strbuf_reset(&iter->last_name); strbuf_addstr(&iter->last_name, iter->log.refname); - iter->base.refname = iter->log.refname; + iter->base.ref.name = iter->log.refname; break; } From 4cea0422879f6a64c0f7ad0ddac6d43897a53e94 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:12 +0200 Subject: [PATCH 026/553] refs: fully reset `struct ref_iterator::ref` on iteration With the introduction of the `struct ref_iterator::ref` field it now is a whole lot easier to introduce new fields that become accessible to the caller without having to adapt every single callsite. But there's a downside: when a new field is introduced we always have to adapt all backends to set that field. This isn't something we can avoid in the general case: when the new field is expected to be populated by all backends we of course cannot avoid doing so. But new fields may be entirely optional, in which case we'd still have such churn. And furthermore, it is very easy right now to leak state from a previous iteration into the next iteration. Address this issue by ensuring that the reference backends all fully reset the field on every single iteration. This ensures that no state from previous iterations can leak into the next one. And it ensures that any newly introduced fields will be zeroed out by default. Note that we don't have to explicitly adapt the "files" backend, as it uses the `cache_ref_iterator` internally. Furthermore, other "wrapping" iterators like for example the `prefix_ref_iterator` copy around the whole reference, so these don't need to be adapted either. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 3 ++- refs/ref-cache.c | 1 + refs/reftable-backend.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 7987acdc96a14b..711e07f8326c6b 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -882,6 +882,7 @@ static int next_record(struct packed_ref_iterator *iter) { const char *p, *eol; + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); strbuf_reset(&iter->refname_buf); /* @@ -916,6 +917,7 @@ static int next_record(struct packed_ref_iterator *iter) !isspace(*p++)) die_invalid_line(iter->snapshot->refs->path, iter->pos, iter->eof - iter->pos); + iter->base.ref.oid = &iter->oid; eol = memchr(p, '\n', iter->eof - p); if (!eol) @@ -1194,7 +1196,6 @@ static struct ref_iterator *packed_ref_iterator_begin( iter->snapshot = snapshot; acquire_snapshot(snapshot); strbuf_init(&iter->refname_buf, 0); - iter->base.ref.oid = &iter->oid; iter->repo = ref_store->repo; iter->flags = flags; diff --git a/refs/ref-cache.c b/refs/ref-cache.c index f1abc39624166e..e427848879d61b 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -425,6 +425,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level->prefix_state = entry_prefix_state; level->index = -1; } else { + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); iter->base.ref.name = entry->name; iter->base.ref.target = entry->u.value.referent; iter->base.ref.oid = &entry->u.value.oid; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 0e47986cb5b699..728886eafd33bd 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -704,6 +704,7 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->oid, flags)) continue; + memset(&iter->base.ref, 0, sizeof(iter->base.ref)); iter->base.ref.name = iter->ref.refname; iter->base.ref.target = referent; iter->base.ref.oid = &iter->oid; From eb2934d94b43388642ce4840994800310a6d3456 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:13 +0200 Subject: [PATCH 027/553] refs: refactor reference status flags The reference flags encode information like whether or not a reference is a symbolic reference or whether it may be broken. This information is stored in a `int flags` bitfield, which is in conflict with our modern best practices; we tend to use an unsigned integer to store flags. Change the type of the field to be `unsigned`. While at it, refactor the individual flags to be part of an `enum` instead of using preprocessor defines. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.h | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/refs.h b/refs.h index 68d235438c2b32..4f0a685714fa3a 100644 --- a/refs.h +++ b/refs.h @@ -333,27 +333,28 @@ struct ref_transaction; * stored in ref_iterator::flags. Other bits are for internal use * only: */ +enum reference_status { + /* Reference is a symbolic reference. */ + REF_ISSYMREF = (1 << 0), -/* Reference is a symbolic reference. */ -#define REF_ISSYMREF 0x01 + /* Reference is a packed reference. */ + REF_ISPACKED = (1 << 1), -/* Reference is a packed reference. */ -#define REF_ISPACKED 0x02 - -/* - * Reference cannot be resolved to an object name: dangling symbolic - * reference (directly or indirectly), corrupt reference file, - * reference exists but name is bad, or symbolic reference refers to - * ill-formatted reference name. - */ -#define REF_ISBROKEN 0x04 + /* + * Reference cannot be resolved to an object name: dangling symbolic + * reference (directly or indirectly), corrupt reference file, + * reference exists but name is bad, or symbolic reference refers to + * ill-formatted reference name. + */ + REF_ISBROKEN = (1 << 2), -/* - * Reference name is not well formed. - * - * See git-check-ref-format(1) for the definition of well formed ref names. - */ -#define REF_BAD_NAME 0x08 + /* + * Reference name is not well formed. + * + * See git-check-ref-format(1) for the definition of well formed ref names. + */ + REF_BAD_NAME = (1 << 3), +}; /* A reference passed to `for_each_ref()`-style callbacks. */ struct reference { @@ -370,8 +371,8 @@ struct reference { */ const struct object_id *oid; - /* A bitfield of `REF_` flags. */ - int flags; + /* A bitfield of `enum reference_status` flags. */ + unsigned flags; }; /* From f89866163704528f1a6570e134853dbb99120e7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:14 +0200 Subject: [PATCH 028/553] refs: expose peeled object ID via the iterator Both the "files" and "reftable" backend are able to store peeled values for tags in the respective formats. This allows for a more efficient lookup of the target object of such a tag without having to manually peel via the object database. The infrastructure to access these peeled object IDs is somewhat funky though. When iterating through objects, we store a pointer reference to the current iterator in a global variable. The callbacks invoked by that iterator are then expected to call `peel_iterated_oid()`, which checks whether the globally-stored iterator's current reference refers to the one handed into that function. If so, we ask the iterator to peel the object, otherwise we manually peel the object via the object database. Depending on global state like this is somewhat weird and also quite fragile. Introduce a new `struct reference::peeled_oid` field that can be populated by the reference backends. This field can be accessed via a new function `reference_get_peeled_oid()` that either uses that value, if set, or alternatively peels via the ODB. With this change we don't have to rely on global state anymore, but make the peeled object ID available to the callback functions directly. Adjust trivial callers that already have a `struct reference` available. Remaining callers will be adjusted in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/describe.c | 2 +- builtin/gc.c | 2 +- builtin/pack-objects.c | 7 ++++--- commit-graph.c | 2 +- ls-refs.c | 2 +- midx-write.c | 2 +- pseudo-merge.c | 2 +- refs.c | 12 ++++++++++++ refs.h | 19 +++++++++++++++++++ refs/packed-backend.c | 1 + refs/reftable-backend.c | 5 +++++ repack-midx.c | 2 +- 12 files changed, 48 insertions(+), 10 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 79545350443c6c..443546aaac96f0 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -208,7 +208,7 @@ static int get_name(const struct reference *ref, void *cb_data UNUSED) } /* Is it annotated? */ - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { is_annotated = !oideq(ref->oid, &peeled); } else { oidcpy(&peeled, ref->oid); diff --git a/builtin/gc.c b/builtin/gc.c index 9de5de175f6a40..f0cf20d42389fe 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1109,7 +1109,7 @@ static int dfs_on_ref(const struct reference *ref, void *cb_data) struct commit_list *stack = NULL; struct commit *commit; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT) return 0; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 39633a0158e095..1613fecb669830 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -838,7 +838,7 @@ static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -3309,7 +3309,8 @@ static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled) && obj_is_packed(&peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled) && + obj_is_packed(&peeled)) add_tag_chain(ref->oid); return 0; } @@ -4537,7 +4538,7 @@ static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNU struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(the_repository, maybe_peeled, ref->name); diff --git a/commit-graph.c b/commit-graph.c index f91af416259c84..80be2ff2c39842 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1857,7 +1857,7 @@ static int add_ref_to_set(const struct reference *ref, void *cb_data) struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (odb_read_object_info(data->repo->objects, maybe_peeled, NULL) == OBJ_COMMIT) oidset_insert(data->commits, maybe_peeled); diff --git a/ls-refs.c b/ls-refs.c index 64d02723691466..8641281b86c55a 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -110,7 +110,7 @@ static int send_ref(const struct reference *ref, void *cb_data) if (data->peel && ref->oid) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) strbuf_addf(&data->buf, " peeled:%s", oid_to_hex(&peeled)); } diff --git a/midx-write.c b/midx-write.c index f4dd875747a4b6..23e61cb0001428 100644 --- a/midx-write.c +++ b/midx-write.c @@ -709,7 +709,7 @@ static int add_ref_to_pending(const struct reference *ref, void *cb_data) return 0; } - if (!peel_iterated_oid(revs->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(revs->repo, ref, &peeled)) maybe_peeled = &peeled; object = parse_object_or_die(revs->repo, maybe_peeled, ref->name); diff --git a/pseudo-merge.c b/pseudo-merge.c index 0abd51b42c185a..a2d5bd85f95ebf 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -230,7 +230,7 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d uint32_t i; int has_bitmap; - if (!peel_iterated_oid(the_repository, ref->oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) maybe_peeled = &peeled; c = lookup_commit(the_repository, maybe_peeled); diff --git a/refs.c b/refs.c index f96cf43b128a27..1b1551f9814394 100644 --- a/refs.c +++ b/refs.c @@ -2334,6 +2334,18 @@ int peel_iterated_oid(struct repository *r, const struct object_id *base, struct return peel_object(r, base, peeled) ? -1 : 0; } +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid) +{ + if (ref->peeled_oid) { + oidcpy(peeled_oid, ref->peeled_oid); + return 0; + } + + return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; +} + int refs_update_symref(struct ref_store *refs, const char *ref, const char *target, const char *logmsg) { diff --git a/refs.h b/refs.h index 4f0a685714fa3a..886ed2c0f43b04 100644 --- a/refs.h +++ b/refs.h @@ -371,10 +371,29 @@ struct reference { */ const struct object_id *oid; + /* + * An optional peeled object ID. This field _may_ be set for tags in + * case the peeled value is present in the backend. Please refer to + * `reference_get_peeled_oid()`. + */ + const struct object_id *peeled_oid; + /* A bitfield of `enum reference_status` flags. */ unsigned flags; }; +/* + * Peel the tag to a non-tag commit. If present, this uses the peeled object ID + * exposed by the reference backend. Otherwise, the object is peeled via the + * object database, which is less efficient. + * + * Return `0` if the reference could be peeled, a negative error code + * otherwise. + */ +int reference_get_peeled_oid(struct repository *repo, + const struct reference *ref, + struct object_id *peeled_oid); + /* * The signature for the callback function for the for_each_*() * functions below. The memory pointed to by the `struct reference` diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 711e07f8326c6b..1fefefd54ed0e7 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -963,6 +963,7 @@ static int next_record(struct packed_ref_iterator *iter) iter->base.ref.flags &= ~REF_KNOWS_PEELED; } else { iter->base.ref.flags |= REF_KNOWS_PEELED; + iter->base.ref.peeled_oid = &iter->peeled; } } else { oidclr(&iter->peeled, iter->repo->hash_algo); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 728886eafd33bd..e214e120d77a5c 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -547,6 +547,7 @@ struct reftable_ref_iterator { struct reftable_iterator iter; struct reftable_ref_record ref; struct object_id oid; + struct object_id peeled_oid; char *prefix; size_t prefix_len; @@ -671,6 +672,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) case REFTABLE_REF_VAL2: oidread(&iter->oid, iter->ref.value.val2.value, refs->base.repo->hash_algo); + oidread(&iter->peeled_oid, iter->ref.value.val2.target_value, + refs->base.repo->hash_algo); break; case REFTABLE_REF_SYMREF: referent = refs_resolve_ref_unsafe(&iter->refs->base, @@ -708,6 +711,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) iter->base.ref.name = iter->ref.refname; iter->base.ref.target = referent; iter->base.ref.oid = &iter->oid; + if (iter->ref.value_type == REFTABLE_REF_VAL2) + iter->base.ref.peeled_oid = &iter->peeled_oid; iter->base.ref.flags = flags; break; diff --git a/repack-midx.c b/repack-midx.c index 349f7e20b53f25..74bdfa3a6e913f 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -22,7 +22,7 @@ static int midx_snapshot_ref_one(const struct reference *ref, void *_data) const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; - if (!peel_iterated_oid(data->repo, ref->oid, &peeled)) + if (!reference_get_peeled_oid(data->repo, ref, &peeled)) maybe_peeled = &peeled; if (oidset_insert(&data->seen, maybe_peeled)) From adecd5f0b6fdd40219d5503fdaf46aa8d36a4ff7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:15 +0200 Subject: [PATCH 029/553] upload-pack: convert to use `reference_get_peeled_oid()` The `write_v0_ref()` callback is invoked from two callsites: - Once via `send_ref()` which is a callback passed to `for_each_namespaced_ref_1()` and `refs_head_ref_namespaced()`. - Once manually to announce capabilities. When sending references to the client we also send the peeled value of tags. As we don't have a `struct reference` available in the second case, we cannot easily peel by calling `reference_get_peeled_oid()`, but we instead have to depend on on global state via `peel_iterated_oid()`. We do have a reference available though in the first case, it's only the second case that keeps us from using `reference_get_peeled_oid()`. But that second case only announces capabilities anyway, so we're not really handling a reference at all here. Adapt that case to construct a reference manually and pass that to `write_v0_ref()`. Start to use `reference_get_peeled_oid()` now that we always have a `struct reference` available. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 0d563ae74e92be..2d2b70cbf2dd0b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1249,15 +1249,15 @@ static void format_session_id(struct strbuf *buf, struct upload_pack_data *d) { } static void write_v0_ref(struct upload_pack_data *data, - const char *refname, const char *refname_nons, - const struct object_id *oid) + const struct reference *ref, + const char *refname_nons) { static const char *capabilities = "multi_ack thin-pack side-band" " side-band-64k ofs-delta shallow deepen-since deepen-not" " deepen-relative no-progress include-tag multi_ack_detailed"; struct object_id peeled; - if (mark_our_ref(refname_nons, refname, oid, &data->hidden_refs)) + if (mark_our_ref(refname_nons, ref->name, ref->oid, &data->hidden_refs)) return; if (capabilities) { @@ -1267,7 +1267,7 @@ static void write_v0_ref(struct upload_pack_data *data, format_symref_info(&symref_info, &data->symref); format_session_id(&session_id, data); packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", - oid_to_hex(oid), refname_nons, + oid_to_hex(ref->oid), refname_nons, 0, capabilities, (data->allow_uor & ALLOW_TIP_SHA1) ? " allow-tip-sha1-in-want" : "", @@ -1283,17 +1283,17 @@ static void write_v0_ref(struct upload_pack_data *data, strbuf_release(&session_id); data->sent_capabilities = 1; } else { - packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons); + packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(ref->oid), refname_nons); } capabilities = NULL; - if (!peel_iterated_oid(the_repository, oid, &peeled)) + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); return; } static int send_ref(const struct reference *ref, void *cb_data) { - write_v0_ref(cb_data, ref->name, strip_namespace(ref->name), ref->oid); + write_v0_ref(cb_data, ref, strip_namespace(ref->name)); return 0; } @@ -1442,8 +1442,12 @@ void upload_pack(const int advertise_refs, const int stateless_rpc, send_ref, &data); for_each_namespaced_ref_1(send_ref, &data); if (!data.sent_capabilities) { - const char *refname = "capabilities^{}"; - write_v0_ref(&data, refname, refname, null_oid(the_hash_algo)); + struct reference ref = { + .name = "capabilities^{}", + .oid = null_oid(the_hash_algo), + }; + + write_v0_ref(&data, &ref, ref.name); } /* * fflush stdout before calling advertise_shallow_grafts because send_ref From 70b783c3a194746d8b747677615f33b94454146f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:16 +0200 Subject: [PATCH 030/553] ref-filter: propagate peeled object ID When queueing a reference in the "ref-filter" subsystem we end up creating a new ref array item that contains the reference's info. One bit of info that we always discard though is the peeled object ID, and because of that we are forced to use `peel_iterated_oid()`. Refactor the code to propagate the peeled object ID via the ref array, if available. This allows us to manually peel tags without having to go through the object database. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 2 +- builtin/tag.c | 2 +- builtin/verify-tag.c | 2 +- ref-filter.c | 66 +++++++++++++++++++++++++------------------- ref-filter.h | 5 +++- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index df09000b30de50..fe77829557f252 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -156,7 +156,7 @@ int cmd_ls_remote(int argc, continue; if (!tail_match(&pattern, ref->name)) continue; - item = ref_array_push(&ref_array, ref->name, &ref->old_oid); + item = ref_array_push(&ref_array, ref->name, &ref->old_oid, NULL); item->symref = xstrdup_or_null(ref->symref); } diff --git a/builtin/tag.c b/builtin/tag.c index f0665af3acdf6c..01eba90c5c7bb2 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -153,7 +153,7 @@ static int verify_tag(const char *name, const char *ref UNUSED, return -1; if (format->format) - pretty_print_ref(name, oid, format); + pretty_print_ref(name, oid, NULL, format); return 0; } diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index cd6bc11095d01a..558121eaa1688e 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -67,7 +67,7 @@ int cmd_verify_tag(int argc, } if (format.format) - pretty_print_ref(name, &oid, &format); + pretty_print_ref(name, &oid, NULL, &format); } return had_error; } diff --git a/ref-filter.c b/ref-filter.c index 6837fa60a9b2b5..7fd8babec8f5bd 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2578,8 +2578,15 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) * If it is a tag object, see if we use the peeled value. If we do, * grab the peeled OID. */ - if (need_tagged && peel_iterated_oid(the_repository, &obj->oid, &oi_deref.oid)) - die("bad tag"); + if (need_tagged) { + if (!is_null_oid(&ref->peeled_oid)) { + oidcpy(&oi_deref.oid, &ref->peeled_oid); + } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + /* We managed to peel the object ourselves. */ + } else { + die("bad tag"); + } + } return get_object(ref, 1, &obj, &oi_deref, err); } @@ -2807,12 +2814,15 @@ static int match_points_at(struct oid_array *points_at, * Callers can then fill in other struct members at their leisure. */ static struct ref_array_item *new_ref_array_item(const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { struct ref_array_item *ref; FLEX_ALLOC_STR(ref, refname, refname); oidcpy(&ref->objectname, oid); + if (peeled_oid) + oidcpy(&ref->peeled_oid, peeled_oid); ref->rest = NULL; return ref; @@ -2826,9 +2836,10 @@ static void ref_array_append(struct ref_array *array, struct ref_array_item *ref struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid) + const struct object_id *oid, + const struct object_id *peeled_oid) { - struct ref_array_item *ref = new_ref_array_item(refname, oid); + struct ref_array_item *ref = new_ref_array_item(refname, oid, peeled_oid); ref_array_append(array, ref); return ref; } @@ -2871,25 +2882,25 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } -static struct ref_array_item *apply_ref_filter(const char *refname, const char *referent, const struct object_id *oid, - int flag, struct ref_filter *filter) +static struct ref_array_item *apply_ref_filter(const struct reference *ref, + struct ref_filter *filter) { - struct ref_array_item *ref; + struct ref_array_item *item; struct commit *commit = NULL; unsigned int kind; - if (flag & REF_BAD_NAME) { - warning(_("ignoring ref with broken name %s"), refname); + if (ref->flags & REF_BAD_NAME) { + warning(_("ignoring ref with broken name %s"), ref->name); return NULL; } - if (flag & REF_ISBROKEN) { - warning(_("ignoring broken ref %s"), refname); + if (ref->flags & REF_ISBROKEN) { + warning(_("ignoring broken ref %s"), ref->name); return NULL; } /* Obtain the current ref kind from filter_ref_kind() and ignore unwanted refs. */ - kind = filter_ref_kind(filter, refname); + kind = filter_ref_kind(filter, ref->name); /* * Generally HEAD refs are printed with special description denoting a rebase, @@ -2902,13 +2913,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * else if (!(kind & filter->kind)) return NULL; - if (!filter_pattern_match(filter, refname)) + if (!filter_pattern_match(filter, ref->name)) return NULL; - if (filter_exclude_match(filter, refname)) + if (filter_exclude_match(filter, ref->name)) return NULL; - if (filter->points_at.nr && !match_points_at(&filter->points_at, oid, refname)) + if (filter->points_at.nr && !match_points_at(&filter->points_at, ref->oid, ref->name)) return NULL; /* @@ -2918,7 +2929,7 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * */ if (filter->reachable_from || filter->unreachable_from || filter->with_commit || filter->no_commit || filter->verbose) { - commit = lookup_commit_reference_gently(the_repository, oid, 1); + commit = lookup_commit_reference_gently(the_repository, ref->oid, 1); if (!commit) return NULL; /* We perform the filtering for the '--contains' option... */ @@ -2936,13 +2947,13 @@ static struct ref_array_item *apply_ref_filter(const char *refname, const char * * to do its job and the resulting list may yet to be pruned * by maxcount logic. */ - ref = new_ref_array_item(refname, oid); - ref->commit = commit; - ref->flag = flag; - ref->kind = kind; - ref->symref = xstrdup_or_null(referent); + item = new_ref_array_item(ref->name, ref->oid, ref->peeled_oid); + item->commit = commit; + item->flag = ref->flags; + item->kind = kind; + item->symref = xstrdup_or_null(ref->target); - return ref; + return item; } struct ref_filter_cbdata { @@ -2959,8 +2970,7 @@ static int filter_one(const struct reference *ref, void *cb_data) struct ref_filter_cbdata *ref_cbdata = cb_data; struct ref_array_item *item; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (item) ref_array_append(ref_cbdata->array, item); @@ -2997,8 +3007,7 @@ static int filter_and_format_one(const struct reference *ref, void *cb_data) struct ref_array_item *item; struct strbuf output = STRBUF_INIT, err = STRBUF_INIT; - item = apply_ref_filter(ref->name, ref->target, ref->oid, - ref->flags, ref_cbdata->filter); + item = apply_ref_filter(ref, ref_cbdata->filter); if (!item) return 0; @@ -3585,13 +3594,14 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma } void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format) { struct ref_array_item *ref_item; struct strbuf output = STRBUF_INIT; struct strbuf err = STRBUF_INIT; - ref_item = new_ref_array_item(name, oid); + ref_item = new_ref_array_item(name, oid, peeled_oid); ref_item->kind = ref_kind_from_refname(name); if (format_ref_array_item(ref_item, format, &output, &err)) die("%s", err.buf); diff --git a/ref-filter.h b/ref-filter.h index 235c60f79c9a0f..120221b47fa30d 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -41,6 +41,7 @@ enum ref_sorting_order { struct ref_array_item { struct object_id objectname; + struct object_id peeled_oid; const char *rest; int flag; unsigned int kind; @@ -187,6 +188,7 @@ void print_formatted_ref_array(struct ref_array *array, struct ref_format *forma * name must be a fully qualified refname. */ void pretty_print_ref(const char *name, const struct object_id *oid, + const struct object_id *peeled_oid, struct ref_format *format); /* @@ -195,7 +197,8 @@ void pretty_print_ref(const char *name, const struct object_id *oid, */ struct ref_array_item *ref_array_push(struct ref_array *array, const char *refname, - const struct object_id *oid); + const struct object_id *oid, + const struct object_id *peeled_oid); /* * If the provided format includes ahead-behind atoms, then compute the From feaaea4c123e6b94ebbdc2135278946ee9cc8eed Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:17 +0200 Subject: [PATCH 031/553] builtin/show-ref: convert to use `reference_get_peeled_oid()` The git-show-ref(1) command has multiple different modes: - It knows to show all references matching a pattern. - It knows to list all references that are an exact match to whatever the user has provided. - It knows to check for reference existence. The first two commands use mostly the same infrastructure to print the references via `show_one()`. But while the former mode uses a proper iterator and thus has a `struct reference` available in its context, the latter calls `refs_read_ref()` and thus doesn't. Consequently, we cannot easily use `reference_get_peeled_oid()` to print the peeled value. Adapt the code so that we manually construct a `struct reference` when verifying refs. We wouldn't ever have the peeled value available anyway as we're not using an iterator here, so we can simply plug in the values we _do_ have. With this change we now have a `struct reference` available at both callsites of `show_one()` and can thus pass it, which allows us to use `reference_get_peeled_oid()` instead of `peel_iterated_oid()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/show-ref.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 4803b5e59865f6..4d4984e4e0c244 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -31,31 +31,31 @@ struct show_one_options { }; static void show_one(const struct show_one_options *opts, - const char *refname, const struct object_id *oid) + const struct reference *ref) { const char *hex; struct object_id peeled; - if (!odb_has_object(the_repository->objects, oid, + if (!odb_has_object(the_repository->objects, ref->oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - die("git show-ref: bad ref %s (%s)", refname, - oid_to_hex(oid)); + die("git show-ref: bad ref %s (%s)", ref->name, + oid_to_hex(ref->oid)); if (opts->quiet) return; - hex = repo_find_unique_abbrev(the_repository, oid, opts->abbrev); + hex = repo_find_unique_abbrev(the_repository, ref->oid, opts->abbrev); if (opts->hash_only) printf("%s\n", hex); else - printf("%s %s\n", hex, refname); + printf("%s %s\n", hex, ref->name); if (!opts->deref_tags) return; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { hex = repo_find_unique_abbrev(the_repository, &peeled, opts->abbrev); - printf("%s %s^{}\n", hex, refname); + printf("%s %s^{}\n", hex, ref->name); } } @@ -93,7 +93,7 @@ static int show_ref(const struct reference *ref, void *cbdata) match: data->found_match++; - show_one(data->show_one_opts, ref->name, ref->oid); + show_one(data->show_one_opts, ref); return 0; } @@ -175,12 +175,18 @@ static int cmd_show_ref__verify(const struct show_one_options *show_one_opts, if ((starts_with(*refs, "refs/") || refname_is_safe(*refs)) && !refs_read_ref(get_main_ref_store(the_repository), *refs, &oid)) { - show_one(show_one_opts, *refs, &oid); - } - else if (!show_one_opts->quiet) + struct reference ref = { + .name = *refs, + .oid = &oid, + }; + + show_one(show_one_opts, &ref); + } else if (!show_one_opts->quiet) { die("'%s' - not a valid ref", *refs); - else + } else { return 1; + } + refs++; } From 5a5c7359f77ecd1bc4b0e172563161d602f131d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:18 +0200 Subject: [PATCH 032/553] refs: drop `current_ref_iter` hack In preceding commits we have refactored all callers of `peel_iterated_oid()` to instead use `reference_get_peeled_oid()`. This allows us to thus get rid of the former function. Getting rid of that function is nice, but even nicer is that this also allows us to get rid of the `current_ref_iter` hack. This global variable tracked the currently-active ref iterator so that we can use it to peel an object ID. Now that the peeled object ID is propagated via `struct reference` though we don't have to depend on this hack anymore, which makes for a more robust and easier-to-understand infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 10 ---------- refs/iterator.c | 5 ----- refs/refs-internal.h | 13 ------------- 3 files changed, 28 deletions(-) diff --git a/refs.c b/refs.c index 1b1551f9814394..9d8f0a9ca4a3a6 100644 --- a/refs.c +++ b/refs.c @@ -2324,16 +2324,6 @@ int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) return refs->be->optimize(refs, opts); } -int peel_iterated_oid(struct repository *r, const struct object_id *base, struct object_id *peeled) -{ - if (current_ref_iter && - (current_ref_iter->ref.oid == base || - oideq(current_ref_iter->ref.oid, base))) - return ref_iterator_peel(current_ref_iter, peeled); - - return peel_object(r, base, peeled) ? -1 : 0; -} - int reference_get_peeled_oid(struct repository *repo, const struct reference *ref, struct object_id *peeled_oid) diff --git a/refs/iterator.c b/refs/iterator.c index fe5980e1b6c96b..072c6aacdb0341 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -458,15 +458,11 @@ struct ref_iterator *prefix_ref_iterator_begin(struct ref_iterator *iter0, return ref_iterator; } -struct ref_iterator *current_ref_iter = NULL; - int do_for_each_ref_iterator(struct ref_iterator *iter, each_ref_fn fn, void *cb_data) { int retval = 0, ok; - struct ref_iterator *old_ref_iter = current_ref_iter; - current_ref_iter = iter; while ((ok = ref_iterator_advance(iter)) == ITER_OK) { retval = fn(&iter->ref, cb_data); if (retval) @@ -474,7 +470,6 @@ int do_for_each_ref_iterator(struct ref_iterator *iter, } out: - current_ref_iter = old_ref_iter; if (ok == ITER_ERROR) retval = -1; ref_iterator_free(iter); diff --git a/refs/refs-internal.h b/refs/refs-internal.h index ed749d16572dac..f4f845bbeaf673 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -376,19 +376,6 @@ struct ref_iterator_vtable { ref_iterator_release_fn *release; }; -/* - * current_ref_iter is a performance hack: when iterating over - * references using the for_each_ref*() functions, current_ref_iter is - * set to the reference iterator before calling the callback function. - * If the callback function calls peel_ref(), then peel_ref() first - * checks whether the reference to be peeled is the one referred to by - * the iterator (it usually is) and if so, asks the iterator for the - * peeled version of the reference if it is available. This avoids a - * refname lookup in a common case. current_ref_iter is set to NULL - * when the iteration is over. - */ -extern struct ref_iterator *current_ref_iter; - struct ref_store; /* refs backends */ From 705114772e0a0741c3288329bd9ac4e11e38db9a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:19 +0200 Subject: [PATCH 033/553] refs: drop infrastructure to peel via iterators Now that the peeled object ID gets propagated via the `struct reference` there is no need anymore to call into the reference iterator itself to dereference an object. Remove this infrastructure. Most of the changes are straight-forward deletions of code. There is one exception though in `refs/packed-backend.c::write_with_updates()`. Here we stop peeling the iterator and instead just pass the peeled object ID of that iterator directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.h | 14 -------------- refs/debug.c | 11 ----------- refs/files-backend.c | 17 ----------------- refs/iterator.c | 36 ------------------------------------ refs/packed-backend.c | 24 +----------------------- refs/ref-cache.c | 9 --------- refs/refs-internal.h | 7 ------- refs/reftable-backend.c | 24 ------------------------ 8 files changed, 1 insertion(+), 141 deletions(-) diff --git a/refs.h b/refs.h index 886ed2c0f43b04..2dd7ac1a16aee9 100644 --- a/refs.h +++ b/refs.h @@ -1289,10 +1289,6 @@ int repo_migrate_ref_storage_format(struct repository *repo, * to the next entry, ref_iterator_advance() aborts the iteration, * frees the ref_iterator, and returns ITER_ERROR. * - * The reference currently being looked at can be peeled by calling - * ref_iterator_peel(). This function is often faster than peel_ref(), - * so it should be preferred when iterating over references. - * * Putting it all together, a typical iteration looks like this: * * int ok; @@ -1307,9 +1303,6 @@ int repo_migrate_ref_storage_format(struct repository *repo, * // Access information about the current reference: * if (!(iter->flags & REF_ISSYMREF)) * printf("%s is %s\n", iter->refname, oid_to_hex(iter->oid)); - * - * // If you need to peel the reference: - * ref_iterator_peel(iter, &oid); * } * * if (ok != ITER_DONE) @@ -1400,13 +1393,6 @@ enum ref_iterator_seek_flag { int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, unsigned int flags); -/* - * If possible, peel the reference currently being viewed by the - * iterator. Return 0 on success. - */ -int ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled); - /* Free the reference iterator and any associated resources. */ void ref_iterator_free(struct ref_iterator *ref_iterator); diff --git a/refs/debug.c b/refs/debug.c index 67718bd1f49f1f..01499b9033ca3c 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -177,16 +177,6 @@ static int debug_ref_iterator_seek(struct ref_iterator *ref_iterator, return res; } -static int debug_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct debug_ref_iterator *diter = - (struct debug_ref_iterator *)ref_iterator; - int res = diter->iter->vtable->peel(diter->iter, peeled); - trace_printf_key(&trace_refs, "iterator_peel: %s: %d\n", diter->iter->ref.name, res); - return res; -} - static void debug_ref_iterator_release(struct ref_iterator *ref_iterator) { struct debug_ref_iterator *diter = @@ -198,7 +188,6 @@ static void debug_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable debug_ref_iterator_vtable = { .advance = debug_ref_iterator_advance, .seek = debug_ref_iterator_seek, - .peel = debug_ref_iterator_peel, .release = debug_ref_iterator_release, }; diff --git a/refs/files-backend.c b/refs/files-backend.c index fac53fa052dd22..5aeb454fb47684 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -993,15 +993,6 @@ static int files_ref_iterator_seek(struct ref_iterator *ref_iterator, return ref_iterator_seek(iter->iter0, refname, flags); } -static int files_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct files_ref_iterator *iter = - (struct files_ref_iterator *)ref_iterator; - - return ref_iterator_peel(iter->iter0, peeled); -} - static void files_ref_iterator_release(struct ref_iterator *ref_iterator) { struct files_ref_iterator *iter = @@ -1012,7 +1003,6 @@ static void files_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable files_ref_iterator_vtable = { .advance = files_ref_iterator_advance, .seek = files_ref_iterator_seek, - .peel = files_ref_iterator_peel, .release = files_ref_iterator_release, }; @@ -2388,12 +2378,6 @@ static int files_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSED, BUG("ref_iterator_seek() called for reflog_iterator"); } -static int files_reflog_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("ref_iterator_peel() called for reflog_iterator"); -} - static void files_reflog_iterator_release(struct ref_iterator *ref_iterator) { struct files_reflog_iterator *iter = @@ -2404,7 +2388,6 @@ static void files_reflog_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable files_reflog_iterator_vtable = { .advance = files_reflog_iterator_advance, .seek = files_reflog_iterator_seek, - .peel = files_reflog_iterator_peel, .release = files_reflog_iterator_release, }; diff --git a/refs/iterator.c b/refs/iterator.c index 072c6aacdb0341..d79aa5ec82dc6f 100644 --- a/refs/iterator.c +++ b/refs/iterator.c @@ -21,12 +21,6 @@ int ref_iterator_seek(struct ref_iterator *ref_iterator, const char *refname, return ref_iterator->vtable->seek(ref_iterator, refname, flags); } -int ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - return ref_iterator->vtable->peel(ref_iterator, peeled); -} - void ref_iterator_free(struct ref_iterator *ref_iterator) { if (ref_iterator) { @@ -60,12 +54,6 @@ static int empty_ref_iterator_seek(struct ref_iterator *ref_iterator UNUSED, return 0; } -static int empty_ref_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("peel called for empty iterator"); -} - static void empty_ref_iterator_release(struct ref_iterator *ref_iterator UNUSED) { } @@ -73,7 +61,6 @@ static void empty_ref_iterator_release(struct ref_iterator *ref_iterator UNUSED) static struct ref_iterator_vtable empty_ref_iterator_vtable = { .advance = empty_ref_iterator_advance, .seek = empty_ref_iterator_seek, - .peel = empty_ref_iterator_peel, .release = empty_ref_iterator_release, }; @@ -240,18 +227,6 @@ static int merge_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int merge_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct merge_ref_iterator *iter = - (struct merge_ref_iterator *)ref_iterator; - - if (!iter->current) { - BUG("peel called before advance for merge iterator"); - } - return ref_iterator_peel(*iter->current, peeled); -} - static void merge_ref_iterator_release(struct ref_iterator *ref_iterator) { struct merge_ref_iterator *iter = @@ -263,7 +238,6 @@ static void merge_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable merge_ref_iterator_vtable = { .advance = merge_ref_iterator_advance, .seek = merge_ref_iterator_seek, - .peel = merge_ref_iterator_peel, .release = merge_ref_iterator_release, }; @@ -412,15 +386,6 @@ static int prefix_ref_iterator_seek(struct ref_iterator *ref_iterator, return ref_iterator_seek(iter->iter0, refname, flags); } -static int prefix_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct prefix_ref_iterator *iter = - (struct prefix_ref_iterator *)ref_iterator; - - return ref_iterator_peel(iter->iter0, peeled); -} - static void prefix_ref_iterator_release(struct ref_iterator *ref_iterator) { struct prefix_ref_iterator *iter = @@ -432,7 +397,6 @@ static void prefix_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable prefix_ref_iterator_vtable = { .advance = prefix_ref_iterator_advance, .seek = prefix_ref_iterator_seek, - .peel = prefix_ref_iterator_peel, .release = prefix_ref_iterator_release, }; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1fefefd54ed0e7..6fa229edd0ffad 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1030,22 +1030,6 @@ static int packed_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct packed_ref_iterator *iter = - (struct packed_ref_iterator *)ref_iterator; - - if ((iter->base.ref.flags & REF_KNOWS_PEELED)) { - oidcpy(peeled, &iter->peeled); - return is_null_oid(&iter->peeled) ? -1 : 0; - } else if ((iter->base.ref.flags & (REF_ISBROKEN | REF_ISSYMREF))) { - return -1; - } else { - return peel_object(iter->repo, &iter->oid, peeled) ? -1 : 0; - } -} - static void packed_ref_iterator_release(struct ref_iterator *ref_iterator) { struct packed_ref_iterator *iter = @@ -1059,7 +1043,6 @@ static void packed_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable packed_ref_iterator_vtable = { .advance = packed_ref_iterator_advance, .seek = packed_ref_iterator_seek, - .peel = packed_ref_iterator_peel, .release = packed_ref_iterator_release, }; @@ -1525,13 +1508,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re if (cmp < 0) { /* Pass the old reference through. */ - - struct object_id peeled; - int peel_error = ref_iterator_peel(iter, &peeled); - if (write_packed_entry(out, iter->ref.name, - iter->ref.oid, - peel_error ? NULL : &peeled)) + iter->ref.oid, iter->ref.peeled_oid)) goto write_error; if ((ok = ref_iterator_advance(iter)) != ITER_OK) { diff --git a/refs/ref-cache.c b/refs/ref-cache.c index e427848879d61b..ffef01a597579e 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -546,14 +546,6 @@ static int cache_ref_iterator_seek(struct ref_iterator *ref_iterator, return 0; } -static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct cache_ref_iterator *iter = - (struct cache_ref_iterator *)ref_iterator; - return peel_object(iter->repo, ref_iterator->ref.oid, peeled) ? -1 : 0; -} - static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) { struct cache_ref_iterator *iter = @@ -565,7 +557,6 @@ static void cache_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable cache_ref_iterator_vtable = { .advance = cache_ref_iterator_advance, .seek = cache_ref_iterator_seek, - .peel = cache_ref_iterator_peel, .release = cache_ref_iterator_release, }; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index f4f845bbeaf673..4671517dade968 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -357,12 +357,6 @@ typedef int ref_iterator_advance_fn(struct ref_iterator *ref_iterator); typedef int ref_iterator_seek_fn(struct ref_iterator *ref_iterator, const char *refname, unsigned int flags); -/* - * Peels the current ref, returning 0 for success or -1 for failure. - */ -typedef int ref_iterator_peel_fn(struct ref_iterator *ref_iterator, - struct object_id *peeled); - /* * Implementations of this function should free any resources specific * to the derived class. @@ -372,7 +366,6 @@ typedef void ref_iterator_release_fn(struct ref_iterator *ref_iterator); struct ref_iterator_vtable { ref_iterator_advance_fn *advance; ref_iterator_seek_fn *seek; - ref_iterator_peel_fn *peel; ref_iterator_release_fn *release; }; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e214e120d77a5c..e329d4a423abdb 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -744,21 +744,6 @@ static int reftable_ref_iterator_seek(struct ref_iterator *ref_iterator, return iter->err; } -static int reftable_ref_iterator_peel(struct ref_iterator *ref_iterator, - struct object_id *peeled) -{ - struct reftable_ref_iterator *iter = - (struct reftable_ref_iterator *)ref_iterator; - - if (iter->ref.value_type == REFTABLE_REF_VAL2) { - oidread(peeled, iter->ref.value.val2.target_value, - iter->refs->base.repo->hash_algo); - return 0; - } - - return -1; -} - static void reftable_ref_iterator_release(struct ref_iterator *ref_iterator) { struct reftable_ref_iterator *iter = @@ -776,7 +761,6 @@ static void reftable_ref_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable reftable_ref_iterator_vtable = { .advance = reftable_ref_iterator_advance, .seek = reftable_ref_iterator_seek, - .peel = reftable_ref_iterator_peel, .release = reftable_ref_iterator_release, }; @@ -2098,13 +2082,6 @@ static int reftable_reflog_iterator_seek(struct ref_iterator *ref_iterator UNUSE return -1; } -static int reftable_reflog_iterator_peel(struct ref_iterator *ref_iterator UNUSED, - struct object_id *peeled UNUSED) -{ - BUG("reftable reflog iterator cannot be peeled"); - return -1; -} - static void reftable_reflog_iterator_release(struct ref_iterator *ref_iterator) { struct reftable_reflog_iterator *iter = @@ -2117,7 +2094,6 @@ static void reftable_reflog_iterator_release(struct ref_iterator *ref_iterator) static struct ref_iterator_vtable reftable_reflog_iterator_vtable = { .advance = reftable_reflog_iterator_advance, .seek = reftable_reflog_iterator_seek, - .peel = reftable_reflog_iterator_peel, .release = reftable_reflog_iterator_release, }; From 7ec85185b197ce1cd28721a6f4415fb9db5cd42f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:20 +0200 Subject: [PATCH 034/553] object: add flag to `peel_object()` to verify object type When peeling a tag to a non-tag object we repeatedly call `parse_object()` on the tagged object until we find the first object that isn't a tag. While this feels sensible at first, there is a big catch here: `parse_object()` doesn't actually verify the type of the tagged object. The relevant code path here eventually ends up in `parse_tag_buffer()`. Here, we parse the various fields of the tag, including the "type". Once we've figured out the type and the tagged object ID, we call one of the `lookup_${type}()` functions for whatever type we have found. There is two possible outcomes in the successful case: 1. The object is already part of our cached objects. In that case we double-check whether the type we're trying to look up matches the type that was cached. 2. The object is _not_ part of our cached objects. In that case, we simply create a new object with the expected type, but we don't parse that object. In the first case we might notice type mismatches, but only in the case where our cache has the object with the correct type. In the second case, we'll blindly assume that the type is correct and then go with it. We'll only notice that the type might be wrong when we try to parse the object at a later point. Now arguably, we could change `parse_tag_buffer()` to verify the tagged object's type for us. But that would have the effect that such a tag cannot be parsed at all anymore, and we have a small bunch of tests for exactly this case that assert we still can open such tags. So this change does not feel like something we can retroactively tighten, even though one shouldn't ever hit such corrupted tags. Instead, add a new `flags` field to `peel_object()` that allows the caller to opt in to strict object verification. This will be wired up at a subset of callsites over the next few commits. Note that this change also inlines `deref_tag_noverify()`. There's only been two callsites of that function, the one we're changing and one in our test helpers. The latter callsite can trivially use `deref_tag()` instead, so by inlining the function we avoid having to pass down the flag. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object.c | 20 +++++++++++++++++--- object.h | 15 ++++++++++++++- ref-filter.c | 2 +- refs.c | 2 +- refs/packed-backend.c | 5 ++--- refs/reftable-backend.c | 4 ++-- t/helper/test-reach.c | 2 +- tag.c | 12 ------------ tag.h | 1 - 9 files changed, 38 insertions(+), 25 deletions(-) diff --git a/object.c b/object.c index 986114a6dba843..e72b0ed4360e67 100644 --- a/object.c +++ b/object.c @@ -209,11 +209,12 @@ struct object *lookup_object_by_type(struct repository *r, enum peel_status peel_object(struct repository *r, const struct object_id *name, - struct object_id *oid) + struct object_id *oid, + unsigned flags) { struct object *o = lookup_unknown_object(r, name); - if (o->type == OBJ_NONE) { + if (o->type == OBJ_NONE || flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { int type = odb_read_object_info(r->objects, name, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; @@ -222,7 +223,20 @@ enum peel_status peel_object(struct repository *r, if (o->type != OBJ_TAG) return PEEL_NON_TAG; - o = deref_tag_noverify(r, o); + while (o && o->type == OBJ_TAG) { + o = parse_object(r, &o->oid); + if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) { + o = ((struct tag *)o)->tagged; + + if (flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + int type = odb_read_object_info(r->objects, &o->oid, NULL); + if (type < 0 || !object_as_type(o, type, 0)) + return PEEL_INVALID; + } + } else { + o = NULL; + } + } if (!o) return PEEL_INVALID; diff --git a/object.h b/object.h index 8c3c1c46e1bf04..1499f63d507c32 100644 --- a/object.h +++ b/object.h @@ -287,6 +287,17 @@ enum peel_status { PEEL_BROKEN = -4 }; +enum peel_object_flags { + /* + * Always verify the object type, even in the case where the looked-up + * object already has an object type. This can be useful when the + * stored object type may be invalid. One such case is when looking up + * objects via tags, where we blindly trust the object type declared by + * the tag. + */ + PEEL_OBJECT_VERIFY_OBJECT_TYPE = (1 << 0), +}; + /* * Peel the named object; i.e., if the object is a tag, resolve the * tag recursively until a non-tag is found. If successful, store the @@ -295,7 +306,9 @@ enum peel_status { * and leave oid unchanged. */ enum peel_status peel_object(struct repository *r, - const struct object_id *name, struct object_id *oid); + const struct object_id *name, + struct object_id *oid, + unsigned flags); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/ref-filter.c b/ref-filter.c index 7fd8babec8f5bd..9a8ed8c8fc1f3b 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &obj->oid, &oi_deref.oid)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/refs.c b/refs.c index 9d8f0a9ca4a3a6..a41a94ae55bb43 100644 --- a/refs.c +++ b/refs.c @@ -2333,7 +2333,7 @@ int reference_get_peeled_oid(struct repository *repo, return 0; } - return peel_object(repo, ref->oid, peeled_oid) ? -1 : 0; + return peel_object(repo, ref->oid, peeled_oid, 0) ? -1 : 0; } int refs_update_symref(struct ref_store *refs, const char *ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 6fa229edd0ffad..4752d3f3981fe3 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1527,9 +1527,8 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re i++; } else { struct object_id peeled; - int peel_error = peel_object(refs->base.repo, - &update->new_oid, - &peeled); + int peel_error = peel_object(refs->base.repo, &update->new_oid, + &peeled, 0); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e329d4a423abdb..9febb2322c3b24 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); @@ -2497,7 +2497,7 @@ static int write_reflog_expiry_table(struct reftable_writer *writer, void *cb_da ref.refname = (char *)arg->refname; ref.update_index = ts; - if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled)) { + if (!peel_object(arg->refs->base.repo, &arg->update_oid, &peeled, 0)) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); memcpy(ref.value.val2.value, arg->update_oid.hash, GIT_MAX_RAWSZ); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 028ec003067828..c58c93800f3232 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -63,7 +63,7 @@ int cmd__reach(int ac, const char **av) die("failed to resolve %s", buf.buf + 2); orig = parse_object(r, &oid); - peeled = deref_tag_noverify(the_repository, orig); + peeled = deref_tag(the_repository, orig, NULL, 0); if (!peeled) die("failed to load commit for input %s resulting in oid %s", diff --git a/tag.c b/tag.c index 1d52686ee105f2..f5c232d2f1f36c 100644 --- a/tag.c +++ b/tag.c @@ -94,18 +94,6 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war return o; } -struct object *deref_tag_noverify(struct repository *r, struct object *o) -{ - while (o && o->type == OBJ_TAG) { - o = parse_object(r, &o->oid); - if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) - o = ((struct tag *)o)->tagged; - else - o = NULL; - } - return o; -} - struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); diff --git a/tag.h b/tag.h index c49d7c19ad3c90..ef12a610372063 100644 --- a/tag.h +++ b/tag.h @@ -16,7 +16,6 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u int parse_tag(struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); -struct object *deref_tag_noverify(struct repository *r, struct object *); int gpg_verify_tag(const struct object_id *oid, const char *name_to_report, unsigned flags); struct object_id *get_tagged_oid(struct tag *tag); From 6ec4c0b45ba916770c6fbc03df94018ca06fb77e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:21 +0200 Subject: [PATCH 035/553] refs: don't store peeled object IDs for invalid tags Both the "files" and "reftable" backend store peeled object IDs for references that point to tags: - The "files" backend stores the value when packing refs, where each peeled object ID is prefixed with "^". - The "reftable" backend stores the value whenever writing a new reference that points to a tag via a special ref record type. Both of these backends use `peel_object()` to find the peeled object ID. But as explained in the preceding commit, that function does not detect the case where the tag's tagged object and its claimed type mismatch. The consequence of storing these bogus peeled object IDs is that we're less likely to detect such corruption in other parts of Git. git-for-each-ref(1) for example does not notice anymore that the tag is broken when using "--format=%(*objectname)" to dereference tags. One could claim that this is good, because it still allows us to mostly use the tag as intended. But the biggest problem here is that we now have different behaviour for such a broken tag depending on whether or not we have its peeled value in the refdb. Fix the issue by verifying the object type when peeling the object. If that verification fails we simply skip storing the peeled value in either of the reference formats. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 2 +- refs/reftable-backend.c | 3 ++- t/pack-refs-tests.sh | 32 ++++++++++++++++++++++++++++++++ t/t0610-reftable-basics.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 4752d3f3981fe3..1ab0c503930164 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1528,7 +1528,7 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re } else { struct object_id peeled; int peel_error = peel_object(refs->base.repo, &update->new_oid, - &peeled, 0); + &peeled, PEEL_OBJECT_VERIFY_OBJECT_TYPE); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 9febb2322c3b24..6bbfd5618dac16 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1632,7 +1632,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.refname = (char *)u->refname; ref.update_index = ts; - peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, 0); + peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, + PEEL_OBJECT_VERIFY_OBJECT_TYPE); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); diff --git a/t/pack-refs-tests.sh b/t/pack-refs-tests.sh index 3dbcc01718e157..095823d915fd63 100644 --- a/t/pack-refs-tests.sh +++ b/t/pack-refs-tests.sh @@ -428,4 +428,36 @@ do ' done +test_expect_success 'pack-refs does not store invalid peeled tag value' ' + test_when_finished rm -rf repo && + git init repo && + ( + cd repo && + git commit --allow-empty --message initial && + + echo garbage >blob-content && + blob_id=$(git hash-object -w -t blob blob-content) && + + # Write an invalid tag into the object database. The tag itself + # is well-formed, but the tagged object is a blob while we + # claim that it is a commit. + cat >tag-content <<-EOF && + object $blob_id + type commit + tag bad-tag + tagger C O Mitter 1112354055 +0200 + + annotated + EOF + tag_id=$(git hash-object -w -t tag tag-content) && + git update-ref refs/tags/bad-tag "$tag_id" && + + # The packed-refs file should not contain the peeled object ID. + # If it did this would cause commands that use the peeled value + # to not notice this corrupted tag. + git pack-refs --all && + test_grep ! "^\^" .git/packed-refs + ) +' + test_done diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index 3ea5d51532a8b8..6575528f212716 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -1135,4 +1135,32 @@ test_expect_success 'fetch: accessing FETCH_HEAD special ref works' ' test_cmp expect actual ' +test_expect_success 'writes do not persist peeled value for invalid tags' ' + test_when_finished rm -rf repo && + git init repo && + ( + cd repo && + git commit --allow-empty --message initial && + + # We cannot easily verify that the peeled value is not stored + # in the tables. Instead, we test this indirectly: we create + # two tags that both point to the same object, but they claim + # different object types. If we parse both tags we notice that + # the parsed tagged object has a mismatch between the two tags + # and bail out. + # + # If we instead use the persisted peeled value we would not + # even parse the tags. As such, we would not notice the + # discrepancy either and thus listing these tags would succeed. + git tag tag-1 -m "tag 1" && + git cat-file tag tag-1 >raw-tag && + sed "s/^type commit$/type blob/" broken-tag && + broken_tag_id=$(git hash-object -w -t tag broken-tag) && + git update-ref refs/tags/tag-2 $broken_tag_id && + + test_must_fail git for-each-ref --format="%(*objectname)" refs/tags/ 2>err && + test_grep "bad tag pointer" err + ) +' + test_done From e66077ae45813a5ca269a7d676310a243bdcc1c2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:22 +0200 Subject: [PATCH 036/553] ref-filter: detect broken tags when dereferencing them Users can ask git-for-each-ref(1) to peel tags and return information of the tagged object by adding an asterisk to the format, like for example "%(*$objectname)". If so, git-for-each-ref(1) peels that object to the first non-tag object and then returns its values. As mentioned in preceding commits, it can happen that the tagged object type and the claimed object type differ, effectively resulting in a corrupt tag. git-for-each-ref(1) would notice this mismatch, print an error and then bail out when trying to peel the tag. But we only notice this corruption in some very specific edge cases! While we have a test in "t/for-each-ref-tests.sh" that verifies the above scenario, this test is specifically crafted to detect the issue at hand. Namely, we create two tags: - One tag points to a specific object with the correct type. - The other tag points to the *same* object with a different type. The fact that both tags point to the same object is important here: `peel_object()` wouldn't notice the corruption if the tagged objects were different. The root cause is that `peel_object()` calls `lookup_${type}()` eventually, where the type is the same type declared in the tag object. Consequently, when we have two tags pointing to the same object but with different declared types we'll call two different lookup functions. The first lookup will store the object with an unverified type A, whereas the second lookup will try to look up the object with a different unverified type B. And it is only now that we notice the discrepancy in object types, even though type A could've already been the wrong type. Fix the issue by verifying the object type in `populate_value()`. With this change we'll also notice type mismatches when only dereferencing a tag once. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 3 ++- t/for-each-ref-tests.sh | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 9a8ed8c8fc1f3b..c54025d6b4c4cd 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2581,7 +2581,8 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (need_tagged) { if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); - } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, 0)) { + } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, + PEEL_OBJECT_VERIFY_OBJECT_TYPE)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/t/for-each-ref-tests.sh b/t/for-each-ref-tests.sh index e3ad19298accde..4593be5fd544a8 100644 --- a/t/for-each-ref-tests.sh +++ b/t/for-each-ref-tests.sh @@ -1809,7 +1809,9 @@ test_expect_success "${git_for_each_ref} reports broken tags" ' bad=$(git hash-object -w -t tag bad) && git update-ref refs/tags/broken-tag-bad $bad && test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ - refs/tags/broken-tag-* + refs/tags/broken-tag-* && + test_must_fail ${git_for_each_ref} --format="%(*objectname)" \ + refs/tags/broken-tag-bad ' test_expect_success 'set up tag with signature and no blank lines' ' From a29e2e8fe7e3935e23d2a03dc429cc9c2e68bfbe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 23 Oct 2025 09:16:23 +0200 Subject: [PATCH 037/553] ref-filter: parse objects on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When formatting an arbitrary object we parse that object regardless of whether or not we actually need any parsed data. In fact, many of the atoms we have don't require any. Refactor the code so that we parse the data on demand when we see an atom that wants to access the objects. This leads to a small speedup, for example in the Chromium repository with around 40000 refs: Benchmark 1: for-each-ref --format='%(raw)' (HEAD~) Time (mean ± σ): 388.7 ms ± 1.1 ms [User: 322.2 ms, System: 65.0 ms] Range (min … max): 387.3 ms … 390.8 ms 10 runs Benchmark 2: for-each-ref --format='%(raw)' (HEAD) Time (mean ± σ): 344.7 ms ± 0.7 ms [User: 287.8 ms, System: 55.1 ms] Range (min … max): 343.9 ms … 345.7 ms 10 runs Summary for-each-ref --format='%(raw)' (HEAD) ran 1.13 ± 0.00 times faster than for-each-ref --format='%(raw)' (HEAD~) With this change, we now spend ~90% of the time decompressing objects, which is almost as good as it gets regarding git-for-each-ref(1)'s own infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 142 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 106 insertions(+), 36 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index c54025d6b4c4cd..7cfcd5c3554930 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -91,6 +91,7 @@ static struct expand_data { struct object_id delta_base_oid; void *content; + struct object *maybe_object; struct object_info info; } oi, oi_deref; @@ -1475,11 +1476,29 @@ static void grab_common_values(struct atom_value *val, int deref, struct expand_ } } +static struct object *get_or_parse_object(struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) +{ + if (!data->maybe_object) { + data->maybe_object = parse_object_buffer(the_repository, &data->oid, data->type, + data->size, data->content, eaten); + if (!data->maybe_object) { + strbuf_addf(err, _("parse_object_buffer failed on %s for %s"), + oid_to_hex(&data->oid), refname); + return NULL; + } + } + + return data->maybe_object; +} + /* See grab_values */ -static void grab_tag_values(struct atom_value *val, int deref, struct object *obj) +static int grab_tag_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { + struct tag *tag = NULL; int i; - struct tag *tag = (struct tag *) obj; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; @@ -1487,6 +1506,14 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob struct atom_value *v = &val[i]; if (!!deref != (*name == '*')) continue; + + if (!tag) { + tag = (struct tag *) get_or_parse_object(data, refname, + err, eaten); + if (!tag) + return -1; + } + if (deref) name++; if (atom_type == ATOM_TAG) @@ -1496,22 +1523,35 @@ static void grab_tag_values(struct atom_value *val, int deref, struct object *ob else if (atom_type == ATOM_OBJECT && tag->tagged) v->s = xstrdup(oid_to_hex(&tag->tagged->oid)); } + + return 0; } /* See grab_values */ -static void grab_commit_values(struct atom_value *val, int deref, struct object *obj) +static int grab_commit_values(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; for (i = 0; i < used_atom_cnt; i++) { const char *name = used_atom[i].name; enum atom_type atom_type = used_atom[i].atom_type; struct atom_value *v = &val[i]; + if (!!deref != (*name == '*')) continue; if (deref) name++; + + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + if (atom_type == ATOM_TREE && grab_oid(name, "tree", get_commit_tree_oid(commit), v, &used_atom[i])) continue; @@ -1531,6 +1571,8 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object v->s = strbuf_detach(&s, NULL); } } + + return 0; } static const char *find_wholine(const char *who, int wholen, const char *buf) @@ -1759,10 +1801,12 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void } } -static void grab_signature(struct atom_value *val, int deref, struct object *obj) +static int grab_signature(struct atom_value *val, int deref, + struct expand_data *data, const char *refname, + struct strbuf *err, int *eaten) { int i; - struct commit *commit = (struct commit *) obj; + struct commit *commit = NULL; struct signature_check sigc = { 0 }; int signature_checked = 0; @@ -1790,6 +1834,13 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj continue; if (!signature_checked) { + if (!commit) { + commit = (struct commit *) get_or_parse_object(data, refname, + err, eaten); + if (!commit) + return -1; + } + check_commit_signature(commit, &sigc); signature_checked = 1; } @@ -1843,6 +1894,8 @@ static void grab_signature(struct atom_value *val, int deref, struct object *obj if (signature_checked) signature_check_clear(&sigc); + + return 0; } static void find_subpos(const char *buf, @@ -1920,9 +1973,8 @@ static void append_lines(struct strbuf *out, const char *buf, unsigned long size } static void grab_describe_values(struct atom_value *val, int deref, - struct object *obj) + struct expand_data *data) { - struct commit *commit = (struct commit *)obj; int i; for (i = 0; i < used_atom_cnt; i++) { @@ -1944,7 +1996,7 @@ static void grab_describe_values(struct atom_value *val, int deref, cmd.git_cmd = 1; strvec_push(&cmd.args, "describe"); strvec_pushv(&cmd.args, atom->u.describe_args.v); - strvec_push(&cmd.args, oid_to_hex(&commit->object.oid)); + strvec_push(&cmd.args, oid_to_hex(&data->oid)); if (pipe_command(&cmd, NULL, 0, &out, 0, &err, 0) < 0) { error(_("failed to run 'describe'")); v->s = xstrdup(""); @@ -2066,24 +2118,36 @@ static void fill_missing_values(struct atom_value *val) * pointed at by the ref itself; otherwise it is the object the * ref (which is a tag) refers to. */ -static void grab_values(struct atom_value *val, int deref, struct object *obj, struct expand_data *data) +static int grab_values(struct atom_value *val, int deref, struct expand_data *data, + const char *refname, struct strbuf *err, int *eaten) { void *buf = data->content; + int ret; - switch (obj->type) { + switch (data->type) { case OBJ_TAG: - grab_tag_values(val, deref, obj); + ret = grab_tag_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("tagger", val, deref, buf); - grab_describe_values(val, deref, obj); + grab_describe_values(val, deref, data); break; case OBJ_COMMIT: - grab_commit_values(val, deref, obj); + ret = grab_commit_values(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + grab_sub_body_contents(val, deref, data); grab_person("author", val, deref, buf); grab_person("committer", val, deref, buf); - grab_signature(val, deref, obj); - grab_describe_values(val, deref, obj); + + ret = grab_signature(val, deref, data, refname, err, eaten); + if (ret < 0) + goto out; + + grab_describe_values(val, deref, data); break; case OBJ_TREE: /* grab_tree_values(val, deref, obj, buf, sz); */ @@ -2094,8 +2158,12 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s grab_sub_body_contents(val, deref, data); break; default: - die("Eh? Object of type %d?", obj->type); + die("Eh? Object of type %d?", data->type); } + + ret = 0; +out: + return ret; } static inline char *copy_advance(char *dst, const char *src) @@ -2292,38 +2360,41 @@ static const char *get_refname(struct used_atom *atom, struct ref_array_item *re return show_ref(&atom->u.refname, ref->refname); } -static int get_object(struct ref_array_item *ref, int deref, struct object **obj, +static int get_object(struct ref_array_item *ref, int deref, struct expand_data *oi, struct strbuf *err) { - /* parse_object_buffer() will set eaten to 0 if free() will be needed */ - int eaten = 1; + /* parse_object_buffer() will set eaten to 1 if free() will be needed */ + int eaten = 0; + int ret; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; oi->info.typep = &oi->type; } + if (odb_read_object_info_extended(the_repository->objects, &oi->oid, &oi->info, - OBJECT_INFO_LOOKUP_REPLACE)) - return strbuf_addf_ret(err, -1, _("missing object %s for %s"), - oid_to_hex(&oi->oid), ref->refname); + OBJECT_INFO_LOOKUP_REPLACE)) { + ret = strbuf_addf_ret(err, -1, _("missing object %s for %s"), + oid_to_hex(&oi->oid), ref->refname); + goto out; + } if (oi->info.disk_sizep && oi->disk_size < 0) BUG("Object size is less than zero."); if (oi->info.contentp) { - *obj = parse_object_buffer(the_repository, &oi->oid, oi->type, oi->size, oi->content, &eaten); - if (!*obj) { - if (!eaten) - free(oi->content); - return strbuf_addf_ret(err, -1, _("parse_object_buffer failed on %s for %s"), - oid_to_hex(&oi->oid), ref->refname); - } - grab_values(ref->value, deref, *obj, oi); + ret = grab_values(ref->value, deref, oi, ref->refname, err, &eaten); + if (ret < 0) + goto out; } grab_common_values(ref->value, deref, oi); + ret = 0; + +out: if (!eaten) free(oi->content); - return 0; + return ret; } static void populate_worktree_map(struct hashmap *map, struct worktree **worktrees) @@ -2376,7 +2447,6 @@ static char *get_worktree_path(const struct ref_array_item *ref) */ static int populate_value(struct ref_array_item *ref, struct strbuf *err) { - struct object *obj; int i; struct object_info empty = OBJECT_INFO_INIT; int ahead_behind_atoms = 0; @@ -2564,14 +2634,14 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) oi.oid = ref->objectname; - if (get_object(ref, 0, &obj, &oi, err)) + if (get_object(ref, 0, &oi, err)) return -1; /* * If there is no atom that wants to know about tagged * object, we are done. */ - if (!need_tagged || (obj->type != OBJ_TAG)) + if (!need_tagged || (oi.type != OBJ_TAG)) return 0; /* @@ -2589,7 +2659,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) } } - return get_object(ref, 1, &obj, &oi_deref, err); + return get_object(ref, 1, &oi_deref, err); } /* From bea37f1d647c6b17896eb3f0c210ac8dfc27b6d7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 4 Nov 2025 15:36:13 +0100 Subject: [PATCH 038/553] ref-filter: fix stale parsed objects In 054f5f457e (ref-filter: parse objects on demand, 2025-10-23) we have started to skip parsing some objects in case we don't need to access their values in the first place. This was done by introducing a new member `struct expand_data::maybe_object` that gets populated on demand via `get_or_parse_object()`. This has led to a regression though where the object now gets reused because we don't reset it properly. The `oi` structure is declared in global scope, and there is no single place where we reset it before invoking `get_object()`. The consequence is that the `maybe_object` member doesn't get reset across calls, so subsequent calls will end up reusing the same object. This is only an issue for a subset of retrieved values, as not all of the infrastructure ends up calling `get_or_parse_object()`. So the effect is limited, which is probably why the issue wasn't detected earlier. Fix the issue by resetting `maybe_object` in `get_object()`. Reported-by: Junio C Hamano Based-on-patch-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- ref-filter.c | 2 ++ t/t7004-tag.sh | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/ref-filter.c b/ref-filter.c index 7cfcd5c3554930..d8667c569a18f1 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2367,6 +2367,8 @@ static int get_object(struct ref_array_item *ref, int deref, int eaten = 0; int ret; + oi->maybe_object = NULL; + if (oi->info.contentp) { /* We need to know that to use parse_object_buffer properly */ oi->info.sizep = &oi->size; diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index 10835631ca7644..d1388cfdf45b02 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -2332,4 +2332,24 @@ test_expect_success 'If tag cannot be created then tag message file is not unlin test_path_exists .git/TAG_EDITMSG ' +test_expect_success 'annotated tag version sort' ' + git tag -a -m "sample 1.0" vsample-1.0 && + git tag -a -m "sample 2.0" vsample-2.0 && + git tag -a -m "sample 10.0" vsample-10.0 && + cat >expect <<-EOF && + vsample-1.0 + vsample-2.0 + vsample-10.0 + EOF + + git tag --list --sort=version:tag vsample-\* >actual && + test_cmp expect actual && + + # Ensure that we also handle this case alright in the case we have the + # peeled values cached e.g. via the packed-refs file. + git pack-refs --all && + git tag --list --sort=version:tag vsample-\* && + test_cmp expect actual +' + test_done From 61ac8ba0f034b61d7353a799fecae9fe45137a72 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 4 Nov 2025 07:28:59 -0800 Subject: [PATCH 039/553] t7004: do not chdir around in the main process Move down to no-contains subdirectory inside a subshell, just like the previous step that created and used it does. Signed-off-by: Junio C Hamano --- t/t7004-tag.sh | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index d1388cfdf45b02..ce2ff2a28abaa3 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -2293,24 +2293,26 @@ test_expect_success '--contains combined with --no-contains' ' # don't recurse down to tags for trees or blobs pointed to by *those* # commits. test_expect_success 'Does --[no-]contains stop at commits? Yes!' ' - cd no-contains && - blob=$(git rev-parse v0.3:v0.3.t) && - tree=$(git rev-parse v0.3^{tree}) && - git tag tag-blob $blob && - git tag tag-tree $tree && - git tag --contains v0.3 >actual && - cat >expected <<-\EOF && - v0.3 - v0.4 - v0.5 - EOF - test_cmp expected actual && - git tag --no-contains v0.3 >actual && - cat >expected <<-\EOF && - v0.1 - v0.2 - EOF - test_cmp expected actual + ( + cd no-contains && + blob=$(git rev-parse v0.3:v0.3.t) && + tree=$(git rev-parse v0.3^{tree}) && + git tag tag-blob $blob && + git tag tag-tree $tree && + git tag --contains v0.3 >actual && + cat >expected <<-\EOF && + v0.3 + v0.4 + v0.5 + EOF + test_cmp expected actual && + git tag --no-contains v0.3 >actual && + cat >expected <<-\EOF && + v0.1 + v0.2 + EOF + test_cmp expected actual + ) ' test_expect_success 'If tag is created then tag message file is unlinked' ' From 9b93ab8a9c61c53b3b9b2b3ba60c3e5d66b8ff56 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 20 Oct 2025 10:18:29 +0200 Subject: [PATCH 040/553] refs: move to using the '.optimize' functions The `struct ref_store` variable exposes two ways to optimize a reftable backend: 1. pack_refs 2. optimize The former was specific to the 'files' + 'packed' refs backend. The latter is more generic and covers all backends. While the naming is different, both of these functions perform the same functionality. Consolidate this code to only maintain the 'optimize' functions. Do this by modifying the backends so that they exclusively implement the `optimize` callback, only. All users of the refs subsystem already use the 'optimize' function so there is no changes needed on the callee side. Finally, cleanup all references to the 'pack_refs' field of the structure and code around it. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 6 ------ refs.h | 6 ------ refs/debug.c | 8 ++++---- refs/files-backend.c | 14 ++------------ refs/packed-backend.c | 6 +++--- refs/refs-internal.h | 3 --- refs/reftable-backend.c | 13 +++---------- 7 files changed, 12 insertions(+), 44 deletions(-) diff --git a/refs.c b/refs.c index a41a94ae55bb43..b9a4a606462d71 100644 --- a/refs.c +++ b/refs.c @@ -2313,12 +2313,6 @@ void base_ref_store_init(struct ref_store *refs, struct repository *repo, refs->gitdir = xstrdup(path); } -/* backend functions */ -int refs_pack_refs(struct ref_store *refs, struct pack_refs_opts *opts) -{ - return refs->be->pack_refs(refs, opts); -} - int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) { return refs->be->optimize(refs, opts); diff --git a/refs.h b/refs.h index 2dd7ac1a16aee9..8ff591ea95c7d9 100644 --- a/refs.h +++ b/refs.h @@ -514,12 +514,6 @@ struct pack_refs_opts { struct string_list *includes; }; -/* - * Write a packed-refs file for the current repository. - * flags: Combination of the above PACK_REFS_* flags. - */ -int refs_pack_refs(struct ref_store *refs, struct pack_refs_opts *opts); - /* * Optimize the ref store. The exact behavior is up to the backend. * For the files backend, this is equivalent to packing refs. diff --git a/refs/debug.c b/refs/debug.c index 01499b9033ca3c..40cd1d9c1540c6 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -116,11 +116,11 @@ static int debug_transaction_abort(struct ref_store *refs, return res; } -static int debug_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts) +static int debug_optimize(struct ref_store *ref_store, struct pack_refs_opts *opts) { struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; - int res = drefs->refs->be->pack_refs(drefs->refs, opts); - trace_printf_key(&trace_refs, "pack_refs: %d\n", res); + int res = drefs->refs->be->optimize(drefs->refs, opts); + trace_printf_key(&trace_refs, "optimize: %d\n", res); return res; } @@ -430,7 +430,7 @@ struct ref_storage_be refs_be_debug = { .transaction_finish = debug_transaction_finish, .transaction_abort = debug_transaction_abort, - .pack_refs = debug_pack_refs, + .optimize = debug_optimize, .rename_ref = debug_rename_ref, .copy_ref = debug_copy_ref, diff --git a/refs/files-backend.c b/refs/files-backend.c index 5aeb454fb47684..d13b87e056cf5e 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1444,8 +1444,8 @@ static int should_pack_refs(struct files_ref_store *refs, return 0; } -static int files_pack_refs(struct ref_store *ref_store, - struct pack_refs_opts *opts) +static int files_optimize(struct ref_store *ref_store, + struct pack_refs_opts *opts) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB, @@ -1512,15 +1512,6 @@ static int files_pack_refs(struct ref_store *ref_store, return 0; } -static int files_optimize(struct ref_store *ref_store, struct pack_refs_opts *opts) -{ - /* - * For the "files" backend, "optimizing" is the same as "packing". - * So, we just call the existing worker function for packing. - */ - return files_pack_refs(ref_store, opts); -} - /* * People using contrib's git-new-workdir have .git/logs/refs -> * /some/other/path/.git/logs/refs, and that may live on another device. @@ -3975,7 +3966,6 @@ struct ref_storage_be refs_be_files = { .transaction_finish = files_transaction_finish, .transaction_abort = files_transaction_abort, - .pack_refs = files_pack_refs, .optimize = files_optimize, .rename_ref = files_rename_ref, .copy_ref = files_copy_ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1ab0c503930164..20cf9fab18e2e9 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1773,8 +1773,8 @@ static int packed_transaction_finish(struct ref_store *ref_store, return ret; } -static int packed_pack_refs(struct ref_store *ref_store UNUSED, - struct pack_refs_opts *pack_opts UNUSED) +static int packed_optimize(struct ref_store *ref_store UNUSED, + struct pack_refs_opts *pack_opts UNUSED) { /* * Packed refs are already packed. It might be that loose refs @@ -2129,7 +2129,7 @@ struct ref_storage_be refs_be_packed = { .transaction_finish = packed_transaction_finish, .transaction_abort = packed_transaction_abort, - .pack_refs = packed_pack_refs, + .optimize = packed_optimize, .rename_ref = NULL, .copy_ref = NULL, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 4671517dade968..fc5149df5b3c5c 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -422,8 +422,6 @@ typedef int ref_transaction_commit_fn(struct ref_store *refs, struct ref_transaction *transaction, struct strbuf *err); -typedef int pack_refs_fn(struct ref_store *ref_store, - struct pack_refs_opts *opts); typedef int optimize_fn(struct ref_store *ref_store, struct pack_refs_opts *opts); typedef int rename_ref_fn(struct ref_store *ref_store, @@ -550,7 +548,6 @@ struct ref_storage_be { ref_transaction_finish_fn *transaction_finish; ref_transaction_abort_fn *transaction_abort; - pack_refs_fn *pack_refs; optimize_fn *optimize; rename_ref_fn *rename_ref; copy_ref_fn *copy_ref; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 6bbfd5618dac16..43cc66a48e9143 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1700,11 +1700,11 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, return ret; } -static int reftable_be_pack_refs(struct ref_store *ref_store, - struct pack_refs_opts *opts) +static int reftable_be_optimize(struct ref_store *ref_store, + struct pack_refs_opts *opts) { struct reftable_ref_store *refs = - reftable_be_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB, "pack_refs"); + reftable_be_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB, "optimize_refs"); struct reftable_stack *stack; int ret; @@ -1733,12 +1733,6 @@ static int reftable_be_pack_refs(struct ref_store *ref_store, return ret; } -static int reftable_be_optimize(struct ref_store *ref_store, - struct pack_refs_opts *opts) -{ - return reftable_be_pack_refs(ref_store, opts); -} - struct write_create_symref_arg { struct reftable_ref_store *refs; struct reftable_stack *stack; @@ -2761,7 +2755,6 @@ struct ref_storage_be refs_be_reftable = { .transaction_finish = reftable_be_transaction_finish, .transaction_abort = reftable_be_transaction_abort, - .pack_refs = reftable_be_pack_refs, .optimize = reftable_be_optimize, .rename_ref = reftable_be_rename_ref, .copy_ref = reftable_be_copy_ref, From 2cd99d984122f7f1cd7c3b153ee0a0d566831b30 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 20 Oct 2025 10:18:30 +0200 Subject: [PATCH 041/553] refs: rename 'pack_refs_opts' to 'refs_optimize_opts' The previous commit removed all references to 'pack_refs()' within the refs subsystem. Continue this cleanup by also renaming 'pack_refs_opts' to 'refs_optimize_opts' and the respective flags accordingly. Keeping the naming consistent will make the code easier to maintain. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- pack-refs.c | 20 ++++++++++---------- refs.c | 2 +- refs.h | 18 +++++++++--------- refs/debug.c | 2 +- refs/files-backend.c | 10 +++++----- refs/packed-backend.c | 2 +- refs/refs-internal.h | 2 +- refs/reftable-backend.c | 4 ++-- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/pack-refs.c b/pack-refs.c index 1a5e07d8b888ab..eb6b2ba2c2c2b5 100644 --- a/pack-refs.c +++ b/pack-refs.c @@ -14,10 +14,10 @@ int pack_refs_core(int argc, { struct ref_exclusions excludes = REF_EXCLUSIONS_INIT; struct string_list included_refs = STRING_LIST_INIT_NODUP; - struct pack_refs_opts pack_refs_opts = { + struct refs_optimize_opts optimize_opts = { .exclusions = &excludes, .includes = &included_refs, - .flags = PACK_REFS_PRUNE, + .flags = REFS_OPTIMIZE_PRUNE, }; struct string_list option_excluded_refs = STRING_LIST_INIT_NODUP; struct string_list_item *item; @@ -26,9 +26,9 @@ int pack_refs_core(int argc, struct option opts[] = { OPT_BOOL(0, "all", &pack_all, N_("pack everything")), - OPT_BIT(0, "prune", &pack_refs_opts.flags, N_("prune loose refs (default)"), PACK_REFS_PRUNE), - OPT_BIT(0, "auto", &pack_refs_opts.flags, N_("auto-pack refs as needed"), PACK_REFS_AUTO), - OPT_STRING_LIST(0, "include", pack_refs_opts.includes, N_("pattern"), + OPT_BIT(0, "prune", &optimize_opts.flags, N_("prune loose refs (default)"), REFS_OPTIMIZE_PRUNE), + OPT_BIT(0, "auto", &optimize_opts.flags, N_("auto-pack refs as needed"), REFS_OPTIMIZE_AUTO), + OPT_STRING_LIST(0, "include", optimize_opts.includes, N_("pattern"), N_("references to include")), OPT_STRING_LIST(0, "exclude", &option_excluded_refs, N_("pattern"), N_("references to exclude")), @@ -39,15 +39,15 @@ int pack_refs_core(int argc, usage_with_options(usage_opts, opts); for_each_string_list_item(item, &option_excluded_refs) - add_ref_exclusion(pack_refs_opts.exclusions, item->string); + add_ref_exclusion(optimize_opts.exclusions, item->string); if (pack_all) - string_list_append(pack_refs_opts.includes, "*"); + string_list_append(optimize_opts.includes, "*"); - if (!pack_refs_opts.includes->nr) - string_list_append(pack_refs_opts.includes, "refs/tags/*"); + if (!optimize_opts.includes->nr) + string_list_append(optimize_opts.includes, "refs/tags/*"); - ret = refs_optimize(get_main_ref_store(repo), &pack_refs_opts); + ret = refs_optimize(get_main_ref_store(repo), &optimize_opts); clear_ref_exclusions(&excludes); string_list_clear(&included_refs, 0); diff --git a/refs.c b/refs.c index b9a4a606462d71..0d0831f29ba25b 100644 --- a/refs.c +++ b/refs.c @@ -2313,7 +2313,7 @@ void base_ref_store_init(struct ref_store *refs, struct repository *repo, refs->gitdir = xstrdup(path); } -int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts) +int refs_optimize(struct ref_store *refs, struct refs_optimize_opts *opts) { return refs->be->optimize(refs, opts); } diff --git a/refs.h b/refs.h index 8ff591ea95c7d9..6b05bba527ffca 100644 --- a/refs.h +++ b/refs.h @@ -499,16 +499,16 @@ void refs_warn_dangling_symrefs(struct ref_store *refs, FILE *fp, const struct string_list *refnames); /* - * Flags for controlling behaviour of pack_refs() - * PACK_REFS_PRUNE: Prune loose refs after packing - * PACK_REFS_AUTO: Pack refs on a best effort basis. The heuristics and end - * result are decided by the ref backend. Backends may ignore - * this flag and fall back to a normal repack. + * Flags for controlling behaviour of refs_optimize() + * REFS_OPTIMIZE_PRUNE: Prune loose refs after packing + * REFS_OPTIMIZE_AUTO: Pack refs on a best effort basis. The heuristics and end + * result are decided by the ref backend. Backends may ignore + * this flag and fall back to a normal repack. */ -#define PACK_REFS_PRUNE (1 << 0) -#define PACK_REFS_AUTO (1 << 1) +#define REFS_OPTIMIZE_PRUNE (1 << 0) +#define REFS_OPTIMIZE_AUTO (1 << 1) -struct pack_refs_opts { +struct refs_optimize_opts { unsigned int flags; struct ref_exclusions *exclusions; struct string_list *includes; @@ -518,7 +518,7 @@ struct pack_refs_opts { * Optimize the ref store. The exact behavior is up to the backend. * For the files backend, this is equivalent to packing refs. */ -int refs_optimize(struct ref_store *refs, struct pack_refs_opts *opts); +int refs_optimize(struct ref_store *refs, struct refs_optimize_opts *opts); /* * Setup reflog before using. Fill in err and return -1 on failure. diff --git a/refs/debug.c b/refs/debug.c index 40cd1d9c1540c6..2defd2d465712e 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -116,7 +116,7 @@ static int debug_transaction_abort(struct ref_store *refs, return res; } -static int debug_optimize(struct ref_store *ref_store, struct pack_refs_opts *opts) +static int debug_optimize(struct ref_store *ref_store, struct refs_optimize_opts *opts) { struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; int res = drefs->refs->be->optimize(drefs->refs, opts); diff --git a/refs/files-backend.c b/refs/files-backend.c index d13b87e056cf5e..23bb641f2c84ee 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1355,7 +1355,7 @@ static void prune_refs(struct files_ref_store *refs, struct ref_to_prune **refs_ */ static int should_pack_ref(struct files_ref_store *refs, const struct reference *ref, - struct pack_refs_opts *opts) + struct refs_optimize_opts *opts) { struct string_list_item *item; @@ -1383,7 +1383,7 @@ static int should_pack_ref(struct files_ref_store *refs, } static int should_pack_refs(struct files_ref_store *refs, - struct pack_refs_opts *opts) + struct refs_optimize_opts *opts) { struct ref_iterator *iter; size_t packed_size; @@ -1391,7 +1391,7 @@ static int should_pack_refs(struct files_ref_store *refs, size_t limit; int ret; - if (!(opts->flags & PACK_REFS_AUTO)) + if (!(opts->flags & REFS_OPTIMIZE_AUTO)) return 1; ret = packed_refs_size(refs->packed_ref_store, &packed_size); @@ -1445,7 +1445,7 @@ static int should_pack_refs(struct files_ref_store *refs, } static int files_optimize(struct ref_store *ref_store, - struct pack_refs_opts *opts) + struct refs_optimize_opts *opts) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB, @@ -1488,7 +1488,7 @@ static int files_optimize(struct ref_store *ref_store, iter->ref.name, err.buf); /* Schedule the loose reference for pruning if requested. */ - if ((opts->flags & PACK_REFS_PRUNE)) { + if ((opts->flags & REFS_OPTIMIZE_PRUNE)) { struct ref_to_prune *n; FLEX_ALLOC_STR(n, name, iter->ref.name); oidcpy(&n->oid, iter->ref.oid); diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 20cf9fab18e2e9..10062fd8b63063 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1774,7 +1774,7 @@ static int packed_transaction_finish(struct ref_store *ref_store, } static int packed_optimize(struct ref_store *ref_store UNUSED, - struct pack_refs_opts *pack_opts UNUSED) + struct refs_optimize_opts *opts UNUSED) { /* * Packed refs are already packed. It might be that loose refs diff --git a/refs/refs-internal.h b/refs/refs-internal.h index fc5149df5b3c5c..dee42f231dbd0a 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -423,7 +423,7 @@ typedef int ref_transaction_commit_fn(struct ref_store *refs, struct strbuf *err); typedef int optimize_fn(struct ref_store *ref_store, - struct pack_refs_opts *opts); + struct refs_optimize_opts *opts); typedef int rename_ref_fn(struct ref_store *ref_store, const char *oldref, const char *newref, const char *logmsg); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 43cc66a48e9143..c23c45f3bf4639 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1701,7 +1701,7 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, } static int reftable_be_optimize(struct ref_store *ref_store, - struct pack_refs_opts *opts) + struct refs_optimize_opts *opts) { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE | REF_STORE_ODB, "optimize_refs"); @@ -1715,7 +1715,7 @@ static int reftable_be_optimize(struct ref_store *ref_store, if (!stack) stack = refs->main_backend.stack; - if (opts->flags & PACK_REFS_AUTO) + if (opts->flags & REFS_OPTIMIZE_AUTO) ret = reftable_stack_auto_compact(stack); else ret = reftable_stack_compact_all(stack, NULL); From c113f4ca4dbba5529af645f7d7837c7dae12b403 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Mon, 20 Oct 2025 10:18:31 +0200 Subject: [PATCH 042/553] t/pack-refs-tests: move the 'test_done' to callees In ac0bad0af4 (t0601: refactor tests to be shareable, 2025-09-19), we refactored 't/t0601-reffiles-pack-refs.sh' to move all of the tests to 't/pack-refs-tests.sh', which became a common test suite which was also used by 't/t1463-refs-optimize.sh'. This also moved the 'test_done' directive to 't/pack-refs-tests.sh'. Which inhibits additional tests from being added to either of the tests. Let's move the directive out to both the tests, so that we can add additional specific tests to them. Also the test flow logic shouldn't be part of tests which can be embedded in other test scripts. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- t/pack-refs-tests.sh | 2 -- t/t0601-reffiles-pack-refs.sh | 2 ++ t/t1463-refs-optimize.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/t/pack-refs-tests.sh b/t/pack-refs-tests.sh index 095823d915fd63..81086c369089b9 100644 --- a/t/pack-refs-tests.sh +++ b/t/pack-refs-tests.sh @@ -459,5 +459,3 @@ test_expect_success 'pack-refs does not store invalid peeled tag value' ' test_grep ! "^\^" .git/packed-refs ) ' - -test_done diff --git a/t/t0601-reffiles-pack-refs.sh b/t/t0601-reffiles-pack-refs.sh index 12cf5d1dcba814..3c706978efc219 100755 --- a/t/t0601-reffiles-pack-refs.sh +++ b/t/t0601-reffiles-pack-refs.sh @@ -18,3 +18,5 @@ export GIT_TEST_DEFAULT_REF_FORMAT . ./test-lib.sh . "$TEST_DIRECTORY"/pack-refs-tests.sh + +test_done diff --git a/t/t1463-refs-optimize.sh b/t/t1463-refs-optimize.sh index c11c905d795d26..9afe3c1ed7e33a 100755 --- a/t/t1463-refs-optimize.sh +++ b/t/t1463-refs-optimize.sh @@ -15,3 +15,5 @@ export GIT_TEST_DEFAULT_REF_FORMAT pack_refs='refs optimize' . "$TEST_DIRECTORY"/pack-refs-tests.sh + +test_done From e031fa100603af74def6bf2a646c731e4fcd12fc Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Thu, 6 Nov 2025 00:45:59 +0530 Subject: [PATCH 043/553] replay: use die_for_incompatible_opt2() for option validation In preparation for adding the --ref-action option, convert option validation to use die_for_incompatible_opt2(). This helper provides standardized error messages for mutually exclusive options. The following commit introduces --ref-action which will be incompatible with certain other options. Using die_for_incompatible_opt2() now means that commit can cleanly add its validation using the same pattern, keeping the validation logic consistent and maintainable. This also aligns git-replay's option handling with how other Git commands manage option conflicts, using the established die_for_incompatible_opt*() helper family. Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- builtin/replay.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/replay.c b/builtin/replay.c index 6172c8aacc9873..b64fc72063ee8e 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -330,9 +330,9 @@ int cmd_replay(int argc, usage_with_options(replay_usage, replay_options); } - if (advance_name_opt && contained) - die(_("options '%s' and '%s' cannot be used together"), - "--advance", "--contained"); + die_for_incompatible_opt2(!!advance_name_opt, "--advance", + contained, "--contained"); + advance_name = xstrdup_or_null(advance_name_opt); repo_init_revisions(repo, &revs, prefix); From 15cd4ef1f495e51f7db39583b7f562e7170da3d2 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Thu, 6 Nov 2025 00:46:00 +0530 Subject: [PATCH 044/553] replay: make atomic ref updates the default behavior The git replay command currently outputs update commands that can be piped to update-ref to achieve a rebase, e.g. git replay --onto main topic1..topic2 | git update-ref --stdin This separation had advantages for three special cases: * it made testing easy (when state isn't modified from one step to the next, you don't need to make temporary branches or have undo commands, or try to track the changes) * it provided a natural can-it-rebase-cleanly (and what would it rebase to) capability without automatically updating refs, similar to a --dry-run * it provided a natural low-level tool for the suite of hash-object, mktree, commit-tree, mktag, merge-tree, and update-ref, allowing users to have another building block for experimentation and making new tools However, it should be noted that all three of these are somewhat special cases; users, whether on the client or server side, would almost certainly find it more ergonomic to simply have the updating of refs be the default. For server-side operations in particular, the pipeline architecture creates process coordination overhead. Server implementations that need to perform rebases atomically must maintain additional code to: 1. Spawn and manage a pipeline between git-replay and git-update-ref 2. Coordinate stdout/stderr streams across the pipe boundary 3. Handle partial failure states if the pipeline breaks mid-execution 4. Parse and validate the update-ref command output Change the default behavior to update refs directly, and atomically (at least to the extent supported by the refs backend in use). This eliminates the process coordination overhead for the common case. For users needing the traditional pipeline workflow, add a new --ref-action= option that preserves the original behavior: git replay --ref-action=print --onto main topic1..topic2 | git update-ref --stdin The mode can be: * update (default): Update refs directly using an atomic transaction * print: Output update-ref commands for pipeline use Test suite changes: All existing tests that expected command output now use --ref-action=print to preserve their original behavior. This keeps the tests valid while allowing them to verify that the pipeline workflow still works correctly. New tests were added to verify: - Default atomic behavior (no output, refs updated directly) - Bare repository support (server-side use case) - Equivalence between traditional pipeline and atomic updates - Real atomicity using a lock file to verify all-or-nothing guarantee - Test isolation using test_when_finished to clean up state - Reflog messages include replay mode and target A following commit will add a replay.refAction configuration option for users who prefer the traditional pipeline output as their default behavior. Helped-by: Elijah Newren Helped-by: Patrick Steinhardt Helped-by: Christian Couder Helped-by: Phillip Wood Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- Documentation/git-replay.adoc | 61 ++++++++++++------- builtin/replay.c | 111 +++++++++++++++++++++++++++++++--- t/t3650-replay-basics.sh | 67 +++++++++++++++++--- 3 files changed, 199 insertions(+), 40 deletions(-) diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 0b12bf8aa4df42..2ef74ddb127b0c 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -9,15 +9,16 @@ git-replay - EXPERIMENTAL: Replay commits on a new base, works with bare repos t SYNOPSIS -------- [verse] -(EXPERIMENTAL!) 'git replay' ([--contained] --onto | --advance ) ... +(EXPERIMENTAL!) 'git replay' ([--contained] --onto | --advance ) [--ref-action[=]] ... DESCRIPTION ----------- Takes ranges of commits and replays them onto a new location. Leaves -the working tree and the index untouched, and updates no references. -The output of this command is meant to be used as input to -`git update-ref --stdin`, which would update the relevant branches +the working tree and the index untouched. By default, updates the +relevant references using an atomic transaction (all refs update or +none). Use `--ref-action=print` to avoid automatic ref updates and +instead get update commands that can be piped to `git update-ref --stdin` (see the OUTPUT section below). THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. @@ -29,18 +30,27 @@ OPTIONS Starting point at which to create the new commits. May be any valid commit, and not just an existing branch name. + -When `--onto` is specified, the update-ref command(s) in the output will -update the branch(es) in the revision range to point at the new -commits, similar to the way how `git rebase --update-refs` updates -multiple branches in the affected range. +When `--onto` is specified, the branch(es) in the revision range will be +updated to point at the new commits, similar to the way `git rebase --update-refs` +updates multiple branches in the affected range. --advance :: Starting point at which to create the new commits; must be a branch name. + -When `--advance` is specified, the update-ref command(s) in the output -will update the branch passed as an argument to `--advance` to point at -the new commits (in other words, this mimics a cherry-pick operation). +The history is replayed on top of the and is updated to +point at the tip of the resulting history. This is different from `--onto`, +which uses the target only as a starting point without updating it. + +--ref-action[=]:: + Control how references are updated. The mode can be: ++ +-- + * `update` (default): Update refs directly using an atomic transaction. + All refs are updated or none are (all-or-nothing behavior). + * `print`: Output update-ref commands for pipeline use. This is the + traditional behavior where output can be piped to `git update-ref --stdin`. +-- :: Range of commits to replay. More than one can @@ -54,8 +64,11 @@ include::rev-list-options.adoc[] OUTPUT ------ -When there are no conflicts, the output of this command is usable as -input to `git update-ref --stdin`. It is of the form: +By default, or with `--ref-action=update`, this command produces no output on +success, as refs are updated directly using an atomic transaction. + +When using `--ref-action=print`, the output is usable as input to +`git update-ref --stdin`. It is of the form: update refs/heads/branch1 ${NEW_branch1_HASH} ${OLD_branch1_HASH} update refs/heads/branch2 ${NEW_branch2_HASH} ${OLD_branch2_HASH} @@ -81,6 +94,14 @@ To simply rebase `mybranch` onto `target`: ------------ $ git replay --onto target origin/main..mybranch +------------ + +The refs are updated atomically and no output is produced on success. + +To see what would be updated without actually updating: + +------------ +$ git replay --ref-action=print --onto target origin/main..mybranch update refs/heads/mybranch ${NEW_mybranch_HASH} ${OLD_mybranch_HASH} ------------ @@ -88,33 +109,29 @@ To cherry-pick the commits from mybranch onto target: ------------ $ git replay --advance target origin/main..mybranch -update refs/heads/target ${NEW_target_HASH} ${OLD_target_HASH} ------------ Note that the first two examples replay the exact same commits and on top of the exact same new base, they only differ in that the first -provides instructions to make mybranch point at the new commits and -the second provides instructions to make target point at them. +updates mybranch to point at the new commits and the second updates +target to point at them. What if you have a stack of branches, one depending upon another, and you'd really like to rebase the whole set? ------------ $ git replay --contained --onto origin/main origin/main..tipbranch -update refs/heads/branch1 ${NEW_branch1_HASH} ${OLD_branch1_HASH} -update refs/heads/branch2 ${NEW_branch2_HASH} ${OLD_branch2_HASH} -update refs/heads/tipbranch ${NEW_tipbranch_HASH} ${OLD_tipbranch_HASH} ------------ +All three branches (`branch1`, `branch2`, and `tipbranch`) are updated +atomically. + When calling `git replay`, one does not need to specify a range of commits to replay using the syntax `A..B`; any range expression will do: ------------ $ git replay --onto origin/main ^base branch1 branch2 branch3 -update refs/heads/branch1 ${NEW_branch1_HASH} ${OLD_branch1_HASH} -update refs/heads/branch2 ${NEW_branch2_HASH} ${OLD_branch2_HASH} -update refs/heads/branch3 ${NEW_branch3_HASH} ${OLD_branch3_HASH} ------------ This will simultaneously rebase `branch1`, `branch2`, and `branch3`, diff --git a/builtin/replay.c b/builtin/replay.c index b64fc72063ee8e..94e60b5b107516 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -20,6 +20,11 @@ #include #include +enum ref_action_mode { + REF_ACTION_UPDATE, + REF_ACTION_PRINT, +}; + static const char *short_commit_name(struct repository *repo, struct commit *commit) { @@ -284,6 +289,38 @@ static struct commit *pick_regular_commit(struct repository *repo, return create_commit(repo, result->tree, pickme, replayed_base); } +static enum ref_action_mode parse_ref_action_mode(const char *ref_action, const char *source) +{ + if (!ref_action || !strcmp(ref_action, "update")) + return REF_ACTION_UPDATE; + if (!strcmp(ref_action, "print")) + return REF_ACTION_PRINT; + die(_("invalid %s value: '%s'"), source, ref_action); +} + +static int handle_ref_update(enum ref_action_mode mode, + struct ref_transaction *transaction, + const char *refname, + const struct object_id *new_oid, + const struct object_id *old_oid, + const char *reflog_msg, + struct strbuf *err) +{ + switch (mode) { + case REF_ACTION_PRINT: + printf("update %s %s %s\n", + refname, + oid_to_hex(new_oid), + oid_to_hex(old_oid)); + return 0; + case REF_ACTION_UPDATE: + return ref_transaction_update(transaction, refname, new_oid, old_oid, + NULL, NULL, 0, reflog_msg, err); + default: + BUG("unknown ref_action_mode %d", mode); + } +} + int cmd_replay(int argc, const char **argv, const char *prefix, @@ -294,6 +331,8 @@ int cmd_replay(int argc, struct commit *onto = NULL; const char *onto_name = NULL; int contained = 0; + const char *ref_action = NULL; + enum ref_action_mode ref_mode = REF_ACTION_UPDATE; struct rev_info revs; struct commit *last_commit = NULL; @@ -302,12 +341,15 @@ int cmd_replay(int argc, struct merge_result result; struct strset *update_refs = NULL; kh_oid_map_t *replayed_commits; + struct ref_transaction *transaction = NULL; + struct strbuf transaction_err = STRBUF_INIT; + struct strbuf reflog_msg = STRBUF_INIT; int ret = 0; - const char * const replay_usage[] = { + const char *const replay_usage[] = { N_("(EXPERIMENTAL!) git replay " "([--contained] --onto | --advance ) " - "..."), + "[--ref-action[=]] ..."), NULL }; struct option replay_options[] = { @@ -319,6 +361,9 @@ int cmd_replay(int argc, N_("replay onto given commit")), OPT_BOOL(0, "contained", &contained, N_("advance all branches contained in revision-range")), + OPT_STRING(0, "ref-action", &ref_action, + N_("mode"), + N_("control ref update behavior (update|print)")), OPT_END() }; @@ -333,6 +378,10 @@ int cmd_replay(int argc, die_for_incompatible_opt2(!!advance_name_opt, "--advance", contained, "--contained"); + /* Parse ref action mode */ + if (ref_action) + ref_mode = parse_ref_action_mode(ref_action, "--ref-action"); + advance_name = xstrdup_or_null(advance_name_opt); repo_init_revisions(repo, &revs, prefix); @@ -389,6 +438,24 @@ int cmd_replay(int argc, determine_replay_mode(repo, &revs.cmdline, onto_name, &advance_name, &onto, &update_refs); + /* Build reflog message */ + if (advance_name_opt) + strbuf_addf(&reflog_msg, "replay --advance %s", advance_name_opt); + else + strbuf_addf(&reflog_msg, "replay --onto %s", + oid_to_hex(&onto->object.oid)); + + /* Initialize ref transaction if using update mode */ + if (ref_mode == REF_ACTION_UPDATE) { + transaction = ref_store_transaction_begin(get_main_ref_store(repo), + 0, &transaction_err); + if (!transaction) { + ret = error(_("failed to begin ref transaction: %s"), + transaction_err.buf); + goto cleanup; + } + } + if (!onto) /* FIXME: Should handle replaying down to root commit */ die("Replaying down to root commit is not supported yet!"); @@ -434,10 +501,16 @@ int cmd_replay(int argc, if (decoration->type == DECORATION_REF_LOCAL && (contained || strset_contains(update_refs, decoration->name))) { - printf("update %s %s %s\n", - decoration->name, - oid_to_hex(&last_commit->object.oid), - oid_to_hex(&commit->object.oid)); + if (handle_ref_update(ref_mode, transaction, + decoration->name, + &last_commit->object.oid, + &commit->object.oid, + reflog_msg.buf, + &transaction_err) < 0) { + ret = error(_("failed to update ref '%s': %s"), + decoration->name, transaction_err.buf); + goto cleanup; + } } decoration = decoration->next; } @@ -445,10 +518,24 @@ int cmd_replay(int argc, /* In --advance mode, advance the target ref */ if (result.clean == 1 && advance_name) { - printf("update %s %s %s\n", - advance_name, - oid_to_hex(&last_commit->object.oid), - oid_to_hex(&onto->object.oid)); + if (handle_ref_update(ref_mode, transaction, advance_name, + &last_commit->object.oid, + &onto->object.oid, + reflog_msg.buf, + &transaction_err) < 0) { + ret = error(_("failed to update ref '%s': %s"), + advance_name, transaction_err.buf); + goto cleanup; + } + } + + /* Commit the ref transaction if we have one */ + if (transaction && result.clean == 1) { + if (ref_transaction_commit(transaction, &transaction_err)) { + ret = error(_("failed to commit ref transaction: %s"), + transaction_err.buf); + goto cleanup; + } } merge_finalize(&merge_opt, &result); @@ -460,6 +547,10 @@ int cmd_replay(int argc, ret = result.clean; cleanup: + if (transaction) + ref_transaction_free(transaction); + strbuf_release(&transaction_err); + strbuf_release(&reflog_msg); release_revisions(&revs); free(advance_name); diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index 58b37599357827..ec79234c8044c7 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -52,7 +52,7 @@ test_expect_success 'setup bare' ' ' test_expect_success 'using replay to rebase two branches, one on top of other' ' - git replay --onto main topic1..topic2 >result && + git replay --ref-action=print --onto main topic1..topic2 >result && test_line_count = 1 result && @@ -68,7 +68,7 @@ test_expect_success 'using replay to rebase two branches, one on top of other' ' ' test_expect_success 'using replay on bare repo to rebase two branches, one on top of other' ' - git -C bare replay --onto main topic1..topic2 >result-bare && + git -C bare replay --ref-action=print --onto main topic1..topic2 >result-bare && test_cmp expect result-bare ' @@ -86,7 +86,7 @@ test_expect_success 'using replay to perform basic cherry-pick' ' # 2nd field of result is refs/heads/main vs. refs/heads/topic2 # 4th field of result is hash for main instead of hash for topic2 - git replay --advance main topic1..topic2 >result && + git replay --ref-action=print --advance main topic1..topic2 >result && test_line_count = 1 result && @@ -102,7 +102,7 @@ test_expect_success 'using replay to perform basic cherry-pick' ' ' test_expect_success 'using replay on bare repo to perform basic cherry-pick' ' - git -C bare replay --advance main topic1..topic2 >result-bare && + git -C bare replay --ref-action=print --advance main topic1..topic2 >result-bare && test_cmp expect result-bare ' @@ -115,7 +115,7 @@ test_expect_success 'replay fails when both --advance and --onto are omitted' ' ' test_expect_success 'using replay to also rebase a contained branch' ' - git replay --contained --onto main main..topic3 >result && + git replay --ref-action=print --contained --onto main main..topic3 >result && test_line_count = 2 result && cut -f 3 -d " " result >new-branch-tips && @@ -139,12 +139,12 @@ test_expect_success 'using replay to also rebase a contained branch' ' ' test_expect_success 'using replay on bare repo to also rebase a contained branch' ' - git -C bare replay --contained --onto main main..topic3 >result-bare && + git -C bare replay --ref-action=print --contained --onto main main..topic3 >result-bare && test_cmp expect result-bare ' test_expect_success 'using replay to rebase multiple divergent branches' ' - git replay --onto main ^topic1 topic2 topic4 >result && + git replay --ref-action=print --onto main ^topic1 topic2 topic4 >result && test_line_count = 2 result && cut -f 3 -d " " result >new-branch-tips && @@ -168,7 +168,7 @@ test_expect_success 'using replay to rebase multiple divergent branches' ' ' test_expect_success 'using replay on bare repo to rebase multiple divergent branches, including contained ones' ' - git -C bare replay --contained --onto main ^main topic2 topic3 topic4 >result && + git -C bare replay --ref-action=print --contained --onto main ^main topic2 topic3 topic4 >result && test_line_count = 4 result && cut -f 3 -d " " result >new-branch-tips && @@ -217,4 +217,55 @@ test_expect_success 'merge.directoryRenames=false' ' --onto rename-onto rename-onto..rename-from ' +test_expect_success 'default atomic behavior updates refs directly' ' + # Use a separate branch to avoid contaminating topic2 for later tests + git branch test-atomic topic2 && + test_when_finished "git branch -D test-atomic" && + + # Test default atomic behavior (no output, refs updated) + git replay --onto main topic1..test-atomic >output && + test_must_be_empty output && + + # Verify ref was updated + git log --format=%s test-atomic >actual && + test_write_lines E D M L B A >expect && + test_cmp expect actual && + + # Verify reflog message includes SHA of onto commit + git reflog test-atomic -1 --format=%gs >reflog-msg && + ONTO_SHA=$(git rev-parse main) && + echo "replay --onto $ONTO_SHA" >expect-reflog && + test_cmp expect-reflog reflog-msg +' + +test_expect_success 'atomic behavior in bare repository' ' + # Store original state for cleanup + START=$(git -C bare rev-parse topic2) && + test_when_finished "git -C bare update-ref refs/heads/topic2 $START" && + + # Test atomic updates work in bare repo + git -C bare replay --onto main topic1..topic2 >output && + test_must_be_empty output && + + # Verify ref was updated in bare repo + git -C bare log --format=%s topic2 >actual && + test_write_lines E D M L B A >expect && + test_cmp expect actual +' + +test_expect_success 'reflog message for --advance mode' ' + # Store original state + START=$(git rev-parse main) && + test_when_finished "git update-ref refs/heads/main $START" && + + # Test --advance mode reflog message + git replay --advance main topic1..topic2 >output && + test_must_be_empty output && + + # Verify reflog message includes --advance and branch name + git reflog main -1 --format=%gs >reflog-msg && + echo "replay --advance main" >expect-reflog && + test_cmp expect-reflog reflog-msg +' + test_done From 336ac90c06ec757f613faae4ffc6c32578a99cd1 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Thu, 6 Nov 2025 00:46:01 +0530 Subject: [PATCH 045/553] replay: add replay.refAction config option Add a configuration variable to control the default behavior of git replay for updating references. This allows users who prefer the traditional pipeline output to set it once in their config instead of passing --ref-action=print with every command. The config variable uses string values that mirror the behavior modes: * replay.refAction = update (default): atomic ref updates * replay.refAction = print: output commands for pipeline Helped-by: Junio C Hamano Helped-by: Elijah Newren Helped-by: Christian Couder Helped-by: Phillip Wood Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- Documentation/config/replay.adoc | 11 ++++++++ Documentation/git-replay.adoc | 2 ++ builtin/replay.c | 24 ++++++++++++++--- t/t3650-replay-basics.sh | 46 ++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 Documentation/config/replay.adoc diff --git a/Documentation/config/replay.adoc b/Documentation/config/replay.adoc new file mode 100644 index 00000000000000..7d549d2f0e5195 --- /dev/null +++ b/Documentation/config/replay.adoc @@ -0,0 +1,11 @@ +replay.refAction:: + Specifies the default mode for handling reference updates in + `git replay`. The value can be: ++ +-- + * `update`: Update refs directly using an atomic transaction (default behavior). + * `print`: Output update-ref commands for pipeline use. +-- ++ +This setting can be overridden with the `--ref-action` command-line option. +When not configured, `git replay` defaults to `update` mode. diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 2ef74ddb127b0c..dcb26e8a8e88ca 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -51,6 +51,8 @@ which uses the target only as a starting point without updating it. * `print`: Output update-ref commands for pipeline use. This is the traditional behavior where output can be piped to `git update-ref --stdin`. -- ++ +The default mode can be configured via the `replay.refAction` configuration variable. :: Range of commits to replay. More than one can diff --git a/builtin/replay.c b/builtin/replay.c index 94e60b5b107516..6606a2c94bc671 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -8,6 +8,7 @@ #include "git-compat-util.h" #include "builtin.h" +#include "config.h" #include "environment.h" #include "hex.h" #include "lockfile.h" @@ -298,6 +299,22 @@ static enum ref_action_mode parse_ref_action_mode(const char *ref_action, const die(_("invalid %s value: '%s'"), source, ref_action); } +static enum ref_action_mode get_ref_action_mode(struct repository *repo, const char *ref_action) +{ + const char *config_value = NULL; + + /* Command line option takes precedence */ + if (ref_action) + return parse_ref_action_mode(ref_action, "--ref-action"); + + /* Check config value */ + if (!repo_config_get_string_tmp(repo, "replay.refAction", &config_value)) + return parse_ref_action_mode(config_value, "replay.refAction"); + + /* Default to update mode */ + return REF_ACTION_UPDATE; +} + static int handle_ref_update(enum ref_action_mode mode, struct ref_transaction *transaction, const char *refname, @@ -332,7 +349,7 @@ int cmd_replay(int argc, const char *onto_name = NULL; int contained = 0; const char *ref_action = NULL; - enum ref_action_mode ref_mode = REF_ACTION_UPDATE; + enum ref_action_mode ref_mode; struct rev_info revs; struct commit *last_commit = NULL; @@ -378,9 +395,8 @@ int cmd_replay(int argc, die_for_incompatible_opt2(!!advance_name_opt, "--advance", contained, "--contained"); - /* Parse ref action mode */ - if (ref_action) - ref_mode = parse_ref_action_mode(ref_action, "--ref-action"); + /* Parse ref action mode from command line or config */ + ref_mode = get_ref_action_mode(repo, ref_action); advance_name = xstrdup_or_null(advance_name_opt); diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index ec79234c8044c7..cf3aacf3551f8e 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -268,4 +268,50 @@ test_expect_success 'reflog message for --advance mode' ' test_cmp expect-reflog reflog-msg ' +test_expect_success 'replay.refAction=print config option' ' + # Store original state + START=$(git rev-parse topic2) && + test_when_finished "git branch -f topic2 $START" && + + # Test with config set to print + test_config replay.refAction print && + git replay --onto main topic1..topic2 >output && + test_line_count = 1 output && + test_grep "^update refs/heads/topic2 " output +' + +test_expect_success 'replay.refAction=update config option' ' + # Store original state + START=$(git rev-parse topic2) && + test_when_finished "git branch -f topic2 $START" && + + # Test with config set to update + test_config replay.refAction update && + git replay --onto main topic1..topic2 >output && + test_must_be_empty output && + + # Verify ref was updated + git log --format=%s topic2 >actual && + test_write_lines E D M L B A >expect && + test_cmp expect actual +' + +test_expect_success 'command-line --ref-action overrides config' ' + # Store original state + START=$(git rev-parse topic2) && + test_when_finished "git branch -f topic2 $START" && + + # Set config to update but use --ref-action=print + test_config replay.refAction update && + git replay --ref-action=print --onto main topic1..topic2 >output && + test_line_count = 1 output && + test_grep "^update refs/heads/topic2 " output +' + +test_expect_success 'invalid replay.refAction value' ' + test_config replay.refAction invalid && + test_must_fail git replay --onto main topic1..topic2 2>error && + test_grep "invalid.*replay.refAction.*value" error +' + test_done From 77e7aab693daee402ef37323715207c5a2daec9f Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Thu, 6 Nov 2025 09:20:41 +0100 Subject: [PATCH 046/553] gitk: fix a 'continue' statement outside a loop to 'return' When 5de460a2cfdd (gitk: Refactor per-line part of getblobdiffline and its support) moved the body of a loop into a separate function, several 'continue' statements were changed to 'return'. But one instance was missed. Fix it now. Signed-off-by: Johannes Sixt --- gitk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitk b/gitk index c02db0194d5317..eb657aa1e491b4 100755 --- a/gitk +++ b/gitk @@ -8296,7 +8296,7 @@ proc parseblobdiffline {ids line} { if {![regexp {^diff (--cc|--git) } $line m type]} { set line [convertfrom utf-8 $line] $ctext insert end "$line\n" hunksep - continue + return } # start of a new file set diffinhdr 1 From d445a78873423c55b79f97361a082272acd17f7b Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Thu, 6 Nov 2025 10:42:37 +0100 Subject: [PATCH 047/553] gitk: show unescaped file names on 'rename' and 'copy' lines When a file is selected in the file list, the diff window scrolls to the corresponding section. The administrative data needed for this purpose is extracted from the 'rename from', 'rename to', and 'copy to' lines. Escaped file names are unescaped for this purpose. However, the lines shown in the diff window are left in the escaped form. This is not very pleasing. Replace the escaped form by the unescaped form. Add a section to treat the 'copy from' case. Signed-off-by: Johannes Sixt --- gitk | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gitk b/gitk index eb657aa1e491b4..9e4f113c9bc142 100755 --- a/gitk +++ b/gitk @@ -8401,6 +8401,7 @@ proc parseblobdiffline {ids line} { if {$i >= 0} { setinlist difffilestart $i $curdiffstart } + set line "rename from $fname" } elseif {![string compare -length 10 $line "rename to "] || ![string compare -length 8 $line "copy to "]} { set fname [string range $line [expr 4 + [string first " to " $line] ] end] @@ -8408,6 +8409,13 @@ proc parseblobdiffline {ids line} { set fname [lindex $fname 0] } makediffhdr $fname $ids + set line "[lindex $line 0] to $fname" + } elseif {![string compare -length 10 $line "copy from "]} { + set fname [string range $line 10 end] + if {[string index $fname 0] eq "\""} { + set fname [lindex $fname 0] + } + set line "copy from $fname" } elseif {[string compare -length 3 $line "---"] == 0} { # do nothing return From 46207a54cca1402532f6e658503a9f6b7ad36fb8 Mon Sep 17 00:00:00 2001 From: Queen Ediri Jessa Date: Wed, 5 Nov 2025 15:38:49 +0100 Subject: [PATCH 048/553] doc: clarify server behavior for invalid 'want' lines in HTTP protocol Update the documentation to clearly describe how the server responds when a client sends an invalid or malformed `want` line during the HTTP protocol exchange. The server includes the offending object name in its error message. Signed-off-by: Queen Ediri Jessa Signed-off-by: Junio C Hamano --- Documentation/gitprotocol-http.adoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/gitprotocol-http.adoc b/Documentation/gitprotocol-http.adoc index d024010414aa6d..e2ef7f045953d8 100644 --- a/Documentation/gitprotocol-http.adoc +++ b/Documentation/gitprotocol-http.adoc @@ -443,7 +443,8 @@ If no "want" objects are received, send an error: TODO: Define error if no "want" lines are requested. If any "want" object is not reachable, send an error: -TODO: Define error if an invalid "want" is requested. +When a Git server receives an invalid or malformed `want` line, it +responds with an error message that includes the offending object name. Create an empty list, `s_common`. From bdb1cf831251b16d174f742178caac181add87f4 Mon Sep 17 00:00:00 2001 From: Tobias Boesch Date: Thu, 6 Nov 2025 14:42:11 +0000 Subject: [PATCH 049/553] gitk: add external diff file rename detection If a file is renamed between commits and an external diff is started through gitk on the original or the renamed file name, gitk is unable to open the renamed file in the external diff editor. It fails to fetch the renamed file from git, because it fetches it using its original path in contrast to using the renamed path of the file. Detect the rename and open the external diff with the original and the renamed file instead of no file (fetch the renamed file path and name from git) no matter if the original or the renamed file is selected in gitk. Signed-off-by: Tobias Boesch Signed-off-by: Johannes Sixt --- gitk | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/gitk b/gitk index bc9efa18566fb8..9659466052e91a 100755 --- a/gitk +++ b/gitk @@ -3806,6 +3806,34 @@ proc external_diff_get_one_file {diffid filename diffdir} { "revision $diffid"] } +proc check_for_renames_in_diff {filepath} { # renames + global difffilestart ctext + + set filename [file tail $filepath] + set renames {} + + foreach loc $difffilestart { + set loclineend [string map {.0 .end} $loc] + set fromlineloc "$loc + 2 lines" + set tolineloc "$loc + 3 lines" + set renfromline [$ctext get $fromlineloc [string map {.0 .end} $fromlineloc]] + set rentoline [$ctext get $tolineloc [string map {.0 .end} $tolineloc]] + if {[string equal -length 12 "rename from " $renfromline] + && [string equal -length 10 "rename to " $rentoline]} { + set renfrom [string range $renfromline 12 end] + set rento [string range $rentoline 10 end] + if {[string first $filename $renfrom] != -1 + || [string first $filename $rento] != -1} { + lappend renames $renfrom + lappend renames $rento + break + } + } + } + + return $renames +} + proc external_diff {} { global nullid nullid2 global flist_menu_file @@ -3836,8 +3864,16 @@ proc external_diff {} { if {$diffdir eq {}} return # gather files to diff - set difffromfile [external_diff_get_one_file $diffidfrom $flist_menu_file $diffdir] - set difftofile [external_diff_get_one_file $diffidto $flist_menu_file $diffdir] + set renames [check_for_renames_in_diff $flist_menu_file] + set renamefrom [lindex $renames 0] + set renameto [lindex $renames 1] + if {$renamefrom ne {} && $renameto ne {}} { + set difffromfile [external_diff_get_one_file $diffidfrom $renamefrom $diffdir] + set difftofile [external_diff_get_one_file $diffidto $renameto $diffdir] + } else { + set difffromfile [external_diff_get_one_file $diffidfrom $flist_menu_file $diffdir] + set difftofile [external_diff_get_one_file $diffidto $flist_menu_file $diffdir] + } if {$difffromfile ne {} && $difftofile ne {}} { set cmd [list [shellsplit $extdifftool] $difffromfile $difftofile] From 7048e74609fbef2c91bfa3a80e3a9c4fc0ac04c9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Nov 2025 09:52:54 +0100 Subject: [PATCH 050/553] object: fix performance regression when peeling tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our Bencher dashboards [1] have recently alerted us about a bunch of performance regressions when writing references, specifically with the reftable backend. There is a 3x regression when writing many refs with preexisting refs in the reftable format, and a 10x regression when migrating refs between backends in either of the formats. Bisecting the issue lands us at 6ec4c0b45b (refs: don't store peeled object IDs for invalid tags, 2025-10-23). The gist of the commit is that we may end up storing peeled objects in both reftables and packed-refs for corrupted tags, where the claimed tagged object type is different than the actual tagged object type. This will then cause us to create the `struct object *` with a wrong type, as well, and obviously nothing good comes out of that. The fix for this issue was to introduce a new flag to `peel_object()` that causes us to verify the tagged object's type before writing it into the refdb -- if the tag is corrupt, we skip writing the peeled value. To verify whether the peeled value is correct we have to look up the object type via the ODB and compare the actual type with the claimed type, and that additional object lookup is costly. This also explains why we see the regression only when writing refs with the reftable backend, but we see the regression with both backends when migrating refs: - The reftable backend knows to store peeled values in the new table immediately, so it has to try and peel each ref it's about to write to the transaction. So the performance regression is visible for all writes. - The files backend only stores peeled values when writing the packed-refs file, so it wouldn't hit the performance regression for normal writes. But on ref migrations we know to write all new values into the packed-refs file immediately, and that's why we see the regression for both backends there. Taking a step back though reveals an oddity in the new verification logic: we not only verify the _tagged_ object's type, but we also verify the type of the tag itself. But this isn't really needed, as we wouldn't hit the bug in such a case anyway, as we only hit the issue with corrupt tags claiming an invalid type for the tagged object. The consequence of this is that we now started to look up the target object of every single reference we're about to write, regardless of whether it even is a tag or not. And that is of course quite costly. Fix the issue by only verifying the type of the tagged objects. This means that we of course still have a performance hit for actual tags. But this only happens for writes anyway, and I'd claim it's preferable to not store corrupted data in the refdb than to be fast here. Rename the flag accordingly to clarify that we only verify the tagged object's type. This fix brings performance back to previous levels: Benchmark 1: baseline Time (mean ± σ): 46.0 ms ± 0.4 ms [User: 40.0 ms, System: 5.7 ms] Range (min … max): 45.0 ms … 47.1 ms 54 runs Benchmark 2: regression Time (mean ± σ): 140.2 ms ± 1.3 ms [User: 77.5 ms, System: 60.5 ms] Range (min … max): 138.0 ms … 142.7 ms 20 runs Benchmark 3: fix Time (mean ± σ): 46.2 ms ± 0.4 ms [User: 40.2 ms, System: 5.7 ms] Range (min … max): 45.0 ms … 47.3 ms 55 runs Summary update-ref: baseline 1.00 ± 0.01 times faster than fix 3.05 ± 0.04 times faster than regression [1]: https://bencher.dev/perf/git/plots Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object.c | 4 ++-- object.h | 12 ++++++------ ref-filter.c | 2 +- refs/packed-backend.c | 2 +- refs/reftable-backend.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/object.c b/object.c index e72b0ed4360e67..b08fc7a163ae69 100644 --- a/object.c +++ b/object.c @@ -214,7 +214,7 @@ enum peel_status peel_object(struct repository *r, { struct object *o = lookup_unknown_object(r, name); - if (o->type == OBJ_NONE || flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + if (o->type == OBJ_NONE) { int type = odb_read_object_info(r->objects, name, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; @@ -228,7 +228,7 @@ enum peel_status peel_object(struct repository *r, if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged) { o = ((struct tag *)o)->tagged; - if (flags & PEEL_OBJECT_VERIFY_OBJECT_TYPE) { + if (flags & PEEL_OBJECT_VERIFY_TAGGED_OBJECT_TYPE) { int type = odb_read_object_info(r->objects, &o->oid, NULL); if (type < 0 || !object_as_type(o, type, 0)) return PEEL_INVALID; diff --git a/object.h b/object.h index 1499f63d507c32..e9baade1e0ecab 100644 --- a/object.h +++ b/object.h @@ -289,13 +289,13 @@ enum peel_status { enum peel_object_flags { /* - * Always verify the object type, even in the case where the looked-up - * object already has an object type. This can be useful when the - * stored object type may be invalid. One such case is when looking up - * objects via tags, where we blindly trust the object type declared by - * the tag. + * Always verify the object type of the tagged object, even in the case + * where the looked-up object already has an object type. This can be + * useful when the tagged object type may be invalid. One such case is + * when looking up objects via tags, where we blindly trust the object + * type declared by the tag. */ - PEEL_OBJECT_VERIFY_OBJECT_TYPE = (1 << 0), + PEEL_OBJECT_VERIFY_TAGGED_OBJECT_TYPE = (1 << 0), }; /* diff --git a/ref-filter.c b/ref-filter.c index d8667c569a18f1..d7454269e87cd3 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2654,7 +2654,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (!is_null_oid(&ref->peeled_oid)) { oidcpy(&oi_deref.oid, &ref->peeled_oid); } else if (!peel_object(the_repository, &oi.oid, &oi_deref.oid, - PEEL_OBJECT_VERIFY_OBJECT_TYPE)) { + PEEL_OBJECT_VERIFY_TAGGED_OBJECT_TYPE)) { /* We managed to peel the object ourselves. */ } else { die("bad tag"); diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1ab0c503930164..5aa615011a6db3 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1528,7 +1528,7 @@ static enum ref_transaction_error write_with_updates(struct packed_ref_store *re } else { struct object_id peeled; int peel_error = peel_object(refs->base.repo, &update->new_oid, - &peeled, PEEL_OBJECT_VERIFY_OBJECT_TYPE); + &peeled, PEEL_OBJECT_VERIFY_TAGGED_OBJECT_TYPE); if (write_packed_entry(out, update->refname, &update->new_oid, diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 6bbfd5618dac16..1ac1f6156f256d 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1633,7 +1633,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ref.update_index = ts; peel_error = peel_object(arg->refs->base.repo, &u->new_oid, &peeled, - PEEL_OBJECT_VERIFY_OBJECT_TYPE); + PEEL_OBJECT_VERIFY_TAGGED_OBJECT_TYPE); if (!peel_error) { ref.value_type = REFTABLE_REF_VAL2; memcpy(ref.value.val2.target_value, peeled.hash, GIT_MAX_RAWSZ); From 135f491f83d4763bdc61642eb0126ce2e6ada286 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sat, 8 Nov 2025 22:51:53 +0100 Subject: [PATCH 051/553] reftable/stack: return stack segments directly The `stack_table_sizes_for_compaction()` function returns individual sizes of each reftable table. This function is only called by `reftable_stack_auto_compact()` to decide which tables need to be compacted, if any. Modify the function to directly return the segments, which avoids the extra step of receiving the sizes only to pass it to `suggest_compaction_segment()`. A future commit will also add functionality for checking whether auto-compaction is necessary without performing it. This change allows code re-usability in that context. Signed-off-by: Karthik Nayak Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 65d89820bd0748..49387f93446398 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1626,7 +1626,8 @@ struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, return seg; } -static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) +static int stack_segments_for_compaction(struct reftable_stack *st, + struct segment *seg) { int version = (st->opts.hash_id == REFTABLE_HASH_SHA1) ? 1 : 2; int overhead = header_size(version) - 1; @@ -1634,29 +1635,29 @@ static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) REFTABLE_CALLOC_ARRAY(sizes, st->merged->tables_len); if (!sizes) - return NULL; + return REFTABLE_OUT_OF_MEMORY_ERROR; for (size_t i = 0; i < st->merged->tables_len; i++) sizes[i] = st->tables[i]->size - overhead; - return sizes; + *seg = suggest_compaction_segment(sizes, st->merged->tables_len, + st->opts.auto_compaction_factor); + reftable_free(sizes); + + return 0; } int reftable_stack_auto_compact(struct reftable_stack *st) { struct segment seg; - uint64_t *sizes; + int err; if (st->merged->tables_len < 2) return 0; - sizes = stack_table_sizes_for_compaction(st); - if (!sizes) - return REFTABLE_OUT_OF_MEMORY_ERROR; - - seg = suggest_compaction_segment(sizes, st->merged->tables_len, - st->opts.auto_compaction_factor); - reftable_free(sizes); + err = stack_segments_for_compaction(st, &seg); + if (err) + return err; if (segment_size(&seg) > 0) return stack_compact_range(st, seg.start, seg.end - 1, From e35155588aa9f0355eb7e116ea418c189479f62d Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sat, 8 Nov 2025 22:51:54 +0100 Subject: [PATCH 052/553] reftable/stack: add function to check if optimization is required The reftable backend performs auto-compaction as part of its regular flow, which is required to keep the number of tables part of a stack at bay. This allows it to stay optimized. Compaction can also be triggered voluntarily by the user via the 'git pack-refs' or the 'git refs optimize' command. However, currently there is no way for the user to check if optimization is required without actually performing it. Extract out the heuristics logic from 'reftable_stack_auto_compact()' into an internal function 'update_segment_if_compaction_required()'. Then use this to add and expose `reftable_stack_compaction_required()` which will allow users to check if the reftable backend can be optimized. Signed-off-by: Karthik Nayak Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reftable-stack.h | 11 +++++++++ reftable/stack.c | 42 +++++++++++++++++++++++++++++---- t/unit-tests/u-reftable-stack.c | 12 ++++++++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h index d70fcb705dcffe..c2415cbc6e46a6 100644 --- a/reftable/reftable-stack.h +++ b/reftable/reftable-stack.h @@ -123,6 +123,17 @@ struct reftable_log_expiry_config { int reftable_stack_compact_all(struct reftable_stack *st, struct reftable_log_expiry_config *config); +/* + * Check if compaction is required. + * + * When `use_heuristics` is false, check if all tables can be compacted to a + * single table. If true, use heuristics to determine if the tables need to be + * compacted to maintain geometric progression. + */ +int reftable_stack_compaction_required(struct reftable_stack *st, + bool use_heuristics, + bool *required); + /* heuristically compact unbalanced table stack. */ int reftable_stack_auto_compact(struct reftable_stack *st); diff --git a/reftable/stack.c b/reftable/stack.c index 49387f93446398..1c9f21dfe1eb45 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1647,19 +1647,51 @@ static int stack_segments_for_compaction(struct reftable_stack *st, return 0; } -int reftable_stack_auto_compact(struct reftable_stack *st) +static int update_segment_if_compaction_required(struct reftable_stack *st, + struct segment *seg, + bool use_geometric, + bool *required) { - struct segment seg; int err; - if (st->merged->tables_len < 2) + if (st->merged->tables_len < 2) { + *required = false; + return 0; + } + + if (!use_geometric) { + *required = true; return 0; + } + + err = stack_segments_for_compaction(st, seg); + if (err) + return err; + + *required = segment_size(seg) > 0; + return 0; +} + +int reftable_stack_compaction_required(struct reftable_stack *st, + bool use_heuristics, + bool *required) +{ + struct segment seg; + return update_segment_if_compaction_required(st, &seg, use_heuristics, + required); +} + +int reftable_stack_auto_compact(struct reftable_stack *st) +{ + struct segment seg; + bool required; + int err; - err = stack_segments_for_compaction(st, &seg); + err = update_segment_if_compaction_required(st, &seg, true, &required); if (err) return err; - if (segment_size(&seg) > 0) + if (required) return stack_compact_range(st, seg.start, seg.end - 1, NULL, STACK_COMPACT_RANGE_BEST_EFFORT); diff --git a/t/unit-tests/u-reftable-stack.c b/t/unit-tests/u-reftable-stack.c index a8b91812e89ab0..b8110cdeee664b 100644 --- a/t/unit-tests/u-reftable-stack.c +++ b/t/unit-tests/u-reftable-stack.c @@ -1067,6 +1067,7 @@ void test_reftable_stack__add_performs_auto_compaction(void) .value_type = REFTABLE_REF_SYMREF, .value.symref = (char *) "master", }; + bool required = false; char buf[128]; /* @@ -1087,10 +1088,17 @@ void test_reftable_stack__add_performs_auto_compaction(void) * auto compaction is disabled. When enabled, we should merge * all tables in the stack. */ - if (i != n) + cl_assert_equal_i(reftable_stack_compaction_required(st, true, &required), 0); + if (i != n) { cl_assert_equal_i(st->merged->tables_len, i + 1); - else + if (i < 1) + cl_assert_equal_b(required, false); + else + cl_assert_equal_b(required, true); + } else { cl_assert_equal_i(st->merged->tables_len, 1); + cl_assert_equal_b(required, false); + } } reftable_stack_destroy(st); From f6c5ca387a7693b16158826d157178be0ba439dc Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sat, 8 Nov 2025 22:51:55 +0100 Subject: [PATCH 053/553] refs: add a `optimize_required` field to `struct ref_storage_be` To allow users of the refs namespace to check if the reference backend requires optimization, add a new field `optimize_required` field to `struct ref_storage_be`. This field is of type `optimize_required_fn` which is also introduced in this commit. Modify the debug, files, packed and reftable backend to implement this field. A following commit will expose this via 'git pack-refs' and 'git refs optimize'. Signed-off-by: Karthik Nayak Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 7 +++++++ refs.h | 7 +++++++ refs/debug.c | 13 +++++++++++++ refs/files-backend.c | 11 +++++++++++ refs/packed-backend.c | 13 +++++++++++++ refs/refs-internal.h | 6 ++++++ refs/reftable-backend.c | 25 +++++++++++++++++++++++++ 7 files changed, 82 insertions(+) diff --git a/refs.c b/refs.c index 0d0831f29ba25b..5583f6e09d7c76 100644 --- a/refs.c +++ b/refs.c @@ -2318,6 +2318,13 @@ int refs_optimize(struct ref_store *refs, struct refs_optimize_opts *opts) return refs->be->optimize(refs, opts); } +int refs_optimize_required(struct ref_store *refs, + struct refs_optimize_opts *opts, + bool *required) +{ + return refs->be->optimize_required(refs, opts, required); +} + int reference_get_peeled_oid(struct repository *repo, const struct reference *ref, struct object_id *peeled_oid) diff --git a/refs.h b/refs.h index 6b05bba527ffca..d9051bbb0414c2 100644 --- a/refs.h +++ b/refs.h @@ -520,6 +520,13 @@ struct refs_optimize_opts { */ int refs_optimize(struct ref_store *refs, struct refs_optimize_opts *opts); +/* + * Check if refs backend can be optimized by calling 'refs_optimize'. + */ +int refs_optimize_required(struct ref_store *ref_store, + struct refs_optimize_opts *opts, + bool *required); + /* * Setup reflog before using. Fill in err and return -1 on failure. */ diff --git a/refs/debug.c b/refs/debug.c index 2defd2d465712e..36f8c58b6c781f 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -124,6 +124,17 @@ static int debug_optimize(struct ref_store *ref_store, struct refs_optimize_opts return res; } +static int debug_optimize_required(struct ref_store *ref_store, + struct refs_optimize_opts *opts, + bool *required) +{ + struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; + int res = drefs->refs->be->optimize_required(drefs->refs, opts, required); + trace_printf_key(&trace_refs, "optimize_required: %s, res: %d\n", + required ? "yes" : "no", res); + return res; +} + static int debug_rename_ref(struct ref_store *ref_store, const char *oldref, const char *newref, const char *logmsg) { @@ -431,6 +442,8 @@ struct ref_storage_be refs_be_debug = { .transaction_abort = debug_transaction_abort, .optimize = debug_optimize, + .optimize_required = debug_optimize_required, + .rename_ref = debug_rename_ref, .copy_ref = debug_copy_ref, diff --git a/refs/files-backend.c b/refs/files-backend.c index a1e70b1c10dbb8..6e0c9b340a0f6f 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1512,6 +1512,16 @@ static int files_optimize(struct ref_store *ref_store, return 0; } +static int files_optimize_required(struct ref_store *ref_store, + struct refs_optimize_opts *opts, + bool *required) +{ + struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, + "optimize_required"); + *required = should_pack_refs(refs, opts); + return 0; +} + /* * People using contrib's git-new-workdir have .git/logs/refs -> * /some/other/path/.git/logs/refs, and that may live on another device. @@ -3982,6 +3992,7 @@ struct ref_storage_be refs_be_files = { .transaction_abort = files_transaction_abort, .optimize = files_optimize, + .optimize_required = files_optimize_required, .rename_ref = files_rename_ref, .copy_ref = files_copy_ref, diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 10062fd8b63063..19ce4d58728e8c 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1784,6 +1784,17 @@ static int packed_optimize(struct ref_store *ref_store UNUSED, return 0; } +static int packed_optimize_required(struct ref_store *ref_store UNUSED, + struct refs_optimize_opts *opts UNUSED, + bool *required) +{ + /* + * Packed refs are already optimized. + */ + *required = false; + return 0; +} + static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_store UNUSED) { return empty_ref_iterator_begin(); @@ -2130,6 +2141,8 @@ struct ref_storage_be refs_be_packed = { .transaction_abort = packed_transaction_abort, .optimize = packed_optimize, + .optimize_required = packed_optimize_required, + .rename_ref = NULL, .copy_ref = NULL, diff --git a/refs/refs-internal.h b/refs/refs-internal.h index dee42f231dbd0a..c7d2a6e50b7696 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -424,6 +424,11 @@ typedef int ref_transaction_commit_fn(struct ref_store *refs, typedef int optimize_fn(struct ref_store *ref_store, struct refs_optimize_opts *opts); + +typedef int optimize_required_fn(struct ref_store *ref_store, + struct refs_optimize_opts *opts, + bool *required); + typedef int rename_ref_fn(struct ref_store *ref_store, const char *oldref, const char *newref, const char *logmsg); @@ -549,6 +554,7 @@ struct ref_storage_be { ref_transaction_abort_fn *transaction_abort; optimize_fn *optimize; + optimize_required_fn *optimize_required; rename_ref_fn *rename_ref; copy_ref_fn *copy_ref; diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index c23c45f3bf4639..a3ae0cf74a0588 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1733,6 +1733,29 @@ static int reftable_be_optimize(struct ref_store *ref_store, return ret; } +static int reftable_be_optimize_required(struct ref_store *ref_store, + struct refs_optimize_opts *opts, + bool *required) +{ + struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, + "optimize_refs_required"); + struct reftable_stack *stack; + bool use_heuristics = false; + + if (refs->err) + return refs->err; + + stack = refs->worktree_backend.stack; + if (!stack) + stack = refs->main_backend.stack; + + if (opts->flags & REFS_OPTIMIZE_AUTO) + use_heuristics = true; + + return reftable_stack_compaction_required(stack, use_heuristics, + required); +} + struct write_create_symref_arg { struct reftable_ref_store *refs; struct reftable_stack *stack; @@ -2756,6 +2779,8 @@ struct ref_storage_be refs_be_reftable = { .transaction_abort = reftable_be_transaction_abort, .optimize = reftable_be_optimize, + .optimize_required = reftable_be_optimize_required, + .rename_ref = reftable_be_rename_ref, .copy_ref = reftable_be_copy_ref, From 8c1ce2204cc755bdafec85aaa4ac9c5a686a8bf4 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sat, 8 Nov 2025 22:51:56 +0100 Subject: [PATCH 054/553] maintenance: add checking logic in `pack_refs_condition()` The 'git-maintenance(1)' command supports an '--auto' flag. Usage of the flag ensures to run maintenance tasks only if certain thresholds are met. The heuristic is defined on a task level, wherein each task defines an 'auto_condition', which states if the task should be run. The 'pack-refs' task is hard-coded to return 1 as: 1. There was never a way to check if the reference backend needs to be optimized without actually performing the optimization. 2. We can pass in the '--auto' flag to 'git-pack-refs(1)' which would optimize based on heuristics. The previous commit added a `refs_optimize_required()` function, which can be used to check if a reference backend required optimization. Use this within `pack_refs_condition()`. This allows us to add a 'git maintenance is-needed' subcommand which can notify the user if maintenance is needed without actually performing the optimization. Without this change, the reference backend would always state that optimization is needed. Since we import 'revision.h', we need to remove the definition for 'SEEN' which is duplicated in the included header. Signed-off-by: Karthik Nayak Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 30 +++++++++++++++++++++--------- object.h | 1 - 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index c6d62c74a7169f..85e9a38d103522 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -35,6 +35,7 @@ #include "path.h" #include "reflog.h" #include "rerere.h" +#include "revision.h" #include "blob.h" #include "tree.h" #include "promisor-remote.h" @@ -285,12 +286,26 @@ static void maintenance_run_opts_release(struct maintenance_run_opts *opts) static int pack_refs_condition(UNUSED struct gc_config *cfg) { - /* - * The auto-repacking logic for refs is handled by the ref backends and - * exposed via `git pack-refs --auto`. We thus always return truish - * here and let the backend decide for us. - */ - return 1; + struct string_list included_refs = STRING_LIST_INIT_NODUP; + struct ref_exclusions excludes = REF_EXCLUSIONS_INIT; + struct refs_optimize_opts optimize_opts = { + .exclusions = &excludes, + .includes = &included_refs, + .flags = REFS_OPTIMIZE_PRUNE | REFS_OPTIMIZE_AUTO, + }; + bool required; + + /* Check for all refs, similar to 'git refs optimize --all'. */ + string_list_append(optimize_opts.includes, "*"); + + if (refs_optimize_required(get_main_ref_store(the_repository), + &optimize_opts, &required)) + return 0; + + clear_ref_exclusions(&excludes); + string_list_clear(&included_refs, 0); + + return required; } static int maintenance_task_pack_refs(struct maintenance_run_opts *opts, @@ -1090,9 +1105,6 @@ static int maintenance_opt_schedule(const struct option *opt, const char *arg, return 0; } -/* Remember to update object flag allocation in object.h */ -#define SEEN (1u<<0) - struct cg_auto_data { int num_not_in_graph; int limit; diff --git a/object.h b/object.h index 1499f63d507c32..832299e763876c 100644 --- a/object.h +++ b/object.h @@ -79,7 +79,6 @@ void object_array_init(struct object_array *array); * list-objects-filter.c: 21 * bloom.c: 2122 * builtin/fsck.c: 0--3 - * builtin/gc.c: 0 * builtin/index-pack.c: 2021 * reflog.c: 10--12 * builtin/show-branch.c: 0-------------------------------------------26 From 28b83e6f08ae022d54d79e518e72933ae0930091 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sat, 8 Nov 2025 22:51:57 +0100 Subject: [PATCH 055/553] maintenance: add 'is-needed' subcommand The 'git-maintenance(1)' command provides tooling to run maintenance tasks over Git repositories. The 'run' subcommand, as the name suggests, runs the maintenance tasks. When used with the '--auto' flag, it uses heuristics to determine if the required thresholds are met for running said maintenance tasks. There is however a lack of insight into these heuristics. Meaning, the checks are linked to the execution. Add a new 'is-needed' subcommand to 'git-maintenance(1)' which allows users to simply check if it is needed to run maintenance without performing it. This subcommand can check if it is needed to run maintenance without actually running it. Ideally it should be used with the '--auto' flag, which would allow users to check if the thresholds required are met. The subcommand also supports the '--task' flag which can be used to check specific maintenance tasks. While adding the respective tests in 't/t7900-maintenance.sh', remove a duplicate of the test: 'worktree-prune task with --auto honors maintenance.worktree-prune.auto'. Signed-off-by: Karthik Nayak Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-maintenance.adoc | 13 ++++++ builtin/gc.c | 63 +++++++++++++++++++++++++++++- t/t7900-maintenance.sh | 54 +++++++++++++++++-------- 3 files changed, 113 insertions(+), 17 deletions(-) diff --git a/Documentation/git-maintenance.adoc b/Documentation/git-maintenance.adoc index 540b5cf68b0a1c..bda616f14c45d9 100644 --- a/Documentation/git-maintenance.adoc +++ b/Documentation/git-maintenance.adoc @@ -12,6 +12,7 @@ SYNOPSIS 'git maintenance' run [] 'git maintenance' start [--scheduler=] 'git maintenance' (stop|register|unregister) [] +'git maintenance' is-needed [] DESCRIPTION @@ -84,6 +85,16 @@ The `unregister` subcommand will report an error if the current repository is not already registered. Use the `--force` option to return success even when the current repository is not registered. +is-needed:: + Check whether maintenance needs to be run without actually running it. + Exits with a 0 status code if maintenance needs to be run, 1 otherwise. + Ideally used with the '--auto' flag. ++ +If one or more `--task` options are specified, then those tasks are checked +in that order. Otherwise, the tasks are determined by which +`maintenance..enabled` config options are true. By default, only +`maintenance.gc.enabled` is true. + TASKS ----- @@ -183,6 +194,8 @@ OPTIONS in the `gc.auto` config setting, or when the number of pack-files exceeds the `gc.autoPackLimit` config setting. Not compatible with the `--schedule` option. + When combined with the `is-needed` subcommand, check if the required + thresholds are met without actually running maintenance. --schedule:: When combined with the `run` subcommand, run maintenance tasks diff --git a/builtin/gc.c b/builtin/gc.c index 85e9a38d103522..928c805f02b493 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -3253,7 +3253,67 @@ static int maintenance_stop(int argc, const char **argv, const char *prefix, return update_background_schedule(NULL, 0); } -static const char * const builtin_maintenance_usage[] = { +static const char *const builtin_maintenance_is_needed_usage[] = { + "git maintenance is-needed [--task=] [--schedule]", + NULL +}; + +static int maintenance_is_needed(int argc, const char **argv, const char *prefix, + struct repository *repo UNUSED) +{ + struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT; + struct string_list selected_tasks = STRING_LIST_INIT_DUP; + struct gc_config cfg = GC_CONFIG_INIT; + struct option options[] = { + OPT_BOOL(0, "auto", &opts.auto_flag, + N_("run tasks based on the state of the repository")), + OPT_CALLBACK_F(0, "task", &selected_tasks, N_("task"), + N_("check a specific task"), + PARSE_OPT_NONEG, task_option_parse), + OPT_END() + }; + bool is_needed = false; + + argc = parse_options(argc, argv, prefix, options, + builtin_maintenance_is_needed_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc) + usage_with_options(builtin_maintenance_is_needed_usage, options); + + gc_config(&cfg); + initialize_task_config(&opts, &selected_tasks); + + if (opts.auto_flag) { + for (size_t i = 0; i < opts.tasks_nr; i++) { + if (tasks[opts.tasks[i]].auto_condition && + tasks[opts.tasks[i]].auto_condition(&cfg)) { + is_needed = true; + break; + } + } + } else { + /* + * When not using --auto we always require maintenance right now. + * + * TODO: this certainly is too eager, as some maintenance tasks may + * decide to not do anything because the data structures are already + * fully optimized. We may eventually want to extend the auto + * condition to also cover non-auto runs so that we can detect such + * cases. + */ + is_needed = true; + } + + string_list_clear(&selected_tasks, 0); + maintenance_run_opts_release(&opts); + gc_config_release(&cfg); + + if (is_needed) + return 0; + return 1; +} + +static const char *const builtin_maintenance_usage[] = { N_("git maintenance []"), NULL, }; @@ -3270,6 +3330,7 @@ int cmd_maintenance(int argc, OPT_SUBCOMMAND("stop", &fn, maintenance_stop), OPT_SUBCOMMAND("register", &fn, maintenance_register), OPT_SUBCOMMAND("unregister", &fn, maintenance_unregister), + OPT_SUBCOMMAND("is-needed", &fn, maintenance_is_needed), OPT_END(), }; diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index ddd273d8dc24fb..a17e2091c2e647 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -49,7 +49,9 @@ test_expect_success 'run [--auto|--quiet]' ' git maintenance run --auto 2>/dev/null && GIT_TRACE2_EVENT="$(pwd)/run-no-quiet.txt" \ git maintenance run --no-quiet 2>/dev/null && + git maintenance is-needed && test_subcommand git gc --quiet --no-detach --skip-foreground-tasks /dev/null && test_subcommand ! git prune-packed --quiet /dev/null && test_subcommand ! git prune-packed --quiet /dev/null && test_subcommand git prune-packed --quiet /dev/null && @@ -421,10 +435,13 @@ run_incremental_repack_and_verify () { test_commit A && git repack -adk && git multi-pack-index write && + ! git -c maintenance.incremental-repack.auto=1 \ + maintenance is-needed --auto --task=incremental-repack && GIT_TRACE2_EVENT="$(pwd)/midx-init.txt" git \ -c maintenance.incremental-repack.auto=1 \ maintenance run --auto --task=incremental-repack 2>/dev/null && test_subcommand ! git multi-pack-index write --no-progress /dev/null && test_subcommand ! git multi-pack-index write --no-progress /dev/null && @@ -485,9 +505,15 @@ test_expect_success 'reflog-expire task --auto only packs when exceeding limits' git reflog expire --all --expire=now && test_commit reflog-one && test_commit reflog-two && + + ! git -c maintenance.reflog-expire.auto=3 \ + maintenance is-needed --auto --task=reflog-expire && GIT_TRACE2_EVENT="$(pwd)/reflog-expire-auto.txt" \ git -c maintenance.reflog-expire.auto=3 maintenance run --auto --task=reflog-expire && test_subcommand ! git reflog expire --all .git/worktrees/abc && + git maintenance is-needed --auto --task=worktree-prune && test_expect_worktree_prune git maintenance run --auto --task=worktree-prune ' @@ -530,22 +557,7 @@ test_expect_success 'worktree-prune task with --auto honors maintenance.worktree test_expect_worktree_prune ! git -c maintenance.worktree-prune.auto=0 maintenance run --auto --task=worktree-prune && # A positive value should require at least this many prunable worktrees. test_expect_worktree_prune ! git -c maintenance.worktree-prune.auto=4 maintenance run --auto --task=worktree-prune && - test_expect_worktree_prune git -c maintenance.worktree-prune.auto=3 maintenance run --auto --task=worktree-prune -' - -test_expect_success 'worktree-prune task with --auto honors maintenance.worktree-prune.auto' ' - # A negative value should always prune. - test_expect_worktree_prune git -c maintenance.worktree-prune.auto=-1 maintenance run --auto --task=worktree-prune && - - mkdir .git/worktrees && - : >.git/worktrees/first && - : >.git/worktrees/second && - : >.git/worktrees/third && - - # Zero should never prune. - test_expect_worktree_prune ! git -c maintenance.worktree-prune.auto=0 maintenance run --auto --task=worktree-prune && - # A positive value should require at least this many prunable worktrees. - test_expect_worktree_prune ! git -c maintenance.worktree-prune.auto=4 maintenance run --auto --task=worktree-prune && + git -c maintenance.worktree-prune.auto=3 maintenance is-needed --auto --task=worktree-prune && test_expect_worktree_prune git -c maintenance.worktree-prune.auto=3 maintenance run --auto --task=worktree-prune ' @@ -554,11 +566,13 @@ test_expect_success 'worktree-prune task honors gc.worktreePruneExpire' ' rm -rf worktree && rm -f worktree-prune.txt && + ! git -c gc.worktreePruneExpire=1.week.ago maintenance is-needed --auto --task=worktree-prune && GIT_TRACE2_EVENT="$(pwd)/worktree-prune.txt" git -c gc.worktreePruneExpire=1.week.ago maintenance run --auto --task=worktree-prune && test_subcommand ! git worktree prune --expire 1.week.ago .git/rr-cache/entry && + git maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc git maintenance run --auto --task=rerere-gc ' @@ -594,17 +611,22 @@ test_expect_success 'rerere-gc task with --auto honors maintenance.rerere-gc.aut test_when_finished "rm -rf .git/rr-cache" && # A negative value should always prune. + git -c maintenance.rerere-gc.auto=-1 maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc git -c maintenance.rerere-gc.auto=-1 maintenance run --auto --task=rerere-gc && # A positive value prunes when there is at least one entry. + ! git -c maintenance.rerere-gc.auto=9000 maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc ! git -c maintenance.rerere-gc.auto=9000 maintenance run --auto --task=rerere-gc && mkdir .git/rr-cache && + ! git -c maintenance.rerere-gc.auto=9000 maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc ! git -c maintenance.rerere-gc.auto=9000 maintenance run --auto --task=rerere-gc && : >.git/rr-cache/entry-1 && + git -c maintenance.rerere-gc.auto=9000 maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc git -c maintenance.rerere-gc.auto=9000 maintenance run --auto --task=rerere-gc && # Zero should never prune. : >.git/rr-cache/entry-1 && + ! git -c maintenance.rerere-gc.auto=0 maintenance is-needed --auto --task=rerere-gc && test_expect_rerere_gc ! git -c maintenance.rerere-gc.auto=0 maintenance run --auto --task=rerere-gc ' From fa052367ef8f7829996ff15368d63edfff0e40c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 9 Nov 2025 17:43:36 +0100 Subject: [PATCH 056/553] diff: disable rename detection with --quiet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detecting renames and copies improves diff's output. This effort is wasted if we don't show any. Disable detection in that case. This actually fixes the error code when using the options --cached, --find-copies-harder, --no-ext-diff and --quiet together: run_diff_index() indirectly calls diff-lib.c::show_modified(), which queues even non-modified entries using diff_change() because we need them for copy detection. diff_change() sets flags.has_changes, though, which causes diff_can_quit_early() to declare we're done after seeing only the very first entry -- way too soon. Using --cached, --find-copies-harder and --quiet together without --no-ext-diff was not affected even before, as it causes the flag flags.diff_from_contents to be set, which disables the optimization in a different way. Reported-by: D. Ben Knoble Suggested-by: Phillip Wood Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- diff.c | 2 ++ t/t4007-rename-3.sh | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/diff.c b/diff.c index 90e8003dd11e4d..e4f8c0dc6c691b 100644 --- a/diff.c +++ b/diff.c @@ -4965,6 +4965,8 @@ void diff_setup_done(struct diff_options *options) if (options->flags.quick) { options->output_format = DIFF_FORMAT_NO_OUTPUT; options->flags.exit_with_status = 1; + options->detect_rename = 0; + options->flags.find_copies_harder = 0; } /* diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh index e8faf0dd2ef1c5..3fc81bcd760081 100755 --- a/t/t4007-rename-3.sh +++ b/t/t4007-rename-3.sh @@ -41,6 +41,16 @@ test_expect_success 'copy detection, cached' ' compare_diff_raw current expected ' +test_expect_success 'exit code of quiet copy detection' ' + test_expect_code 1 \ + git diff --quiet --cached --find-copies-harder $tree +' + +test_expect_success 'exit code of quiet copy detection with --no-ext-diff' ' + test_expect_code 1 \ + git diff --quiet --cached --find-copies-harder --no-ext-diff $tree +' + # In the tree, there is only path0/COPYING. In the cache, path0 and # path1 both have COPYING and the latter is a copy of path0/COPYING. # However when we say we care only about path1, we should just see From 358e94dc7059500af09435112ef1d4e5f7692e52 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 11 Nov 2025 10:41:20 -0800 Subject: [PATCH 057/553] .gitattributes: remove misspelled no-op whitespace attribute Ever since 14f9e128 (Define the project whitespace policy, 2008-02-10) added the whitespace rules to .gitattributes, we spelled the most general rule like so: * whitespace=!indent,trail,space in the top-level .gitattributes file. The intent of this line was described in the commit log message: - Unless otherwise specified, indent with SP that could be replaced with HT are not "bad". But SP before HT in the indent is "bad", and trailing whitespaces are "bad". It clearly wanted to disable indent-with-non-tab, so !indent is most likely a misspelt form of '-indent'. Because indent-with-non-tab has never been enabled by default, by luck this was not causing any ill effect. We could either remove "!indent", or spell it "-indent". The immediate effect would be the same. It would only start to make a difference when/if we enable indent-with-non-tab by default in future versions of Git. Let's take the former option to remove "!indent" from the list. We would feel the effect first-hand ourselves before anybody else if we ever decide to change the built-in default whitespace rules, which would be hidden from us if we decide to rewrite it to "-indent" instead. Signed-off-by: Junio C Hamano --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 158c3d45c4c10c..2a50ebaf2ee149 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ -* whitespace=!indent,trail,space +* whitespace=trail,space *.[ch] whitespace=indent,trail,space diff=cpp *.sh whitespace=indent,trail,space text eol=lf *.perl text eol=lf diff=perl From 42ed0468663dd493c0a0e00edc83b668369157d6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 11 Nov 2025 17:36:47 -0500 Subject: [PATCH 058/553] attr: avoid recursion when expanding attribute macros Given a set of attribute macros like: [attr]a1 a2 [attr]a2 a3 ... [attr]a300000 -text file a1 expanding the attributes for "file" requires expanding "a1" to "a2", "a2" to "a3", and so on until hitting a non-macro expansion ("-text", in this case). We implement this via recursion: fill_one() calls macroexpand_one(), which then recurses back to fill_one(). As a result, very deep macro chains like the one above can run out of stack space and cause us to segfault. The required stack space is fairly small; I needed on the order of 200,000 entries to get a segfault on Linux. So it's unlikely anybody would hit this accidentally, leaving only malicious inputs. There you can easily construct a repo which will segfault on clone (we look at attributes during the checkout step, but you'd see the same trying to do other operations, like diff in a bare repo). It's mostly harmless, since anybody constructing such a repo is only preventing victims from cloning their evil garbage, but it could be a nuisance for hosting sites. One option to prevent this is to limit the depth of recursion we'll allow. This is conceptually easy to implement, but it raises other questions: what should the limit be, and do we need a configuration knob for it? The recursion here is simple enough that we can avoid those questions by just converting it to iteration instead. Rather than iterate over the states of a match_attr in fill_one(), we'll put them all in a queue, and the expansion of each can add to the queue rather than recursing. Note that this is a LIFO queue in order to keep the same depth-first order we did with the recursive implementation. I've avoided using the word "stack" in the code because the term is already heavily used to refer to the stack of .gitattribute files that matches the tree structure of the repository. The test uses a limited stack size so we can trigger the problem with a much smaller input than the one shown above. The value here (3000) is enough to trigger the issue on my x86_64 Linux machine. Reported-by: Ben Stav Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- attr.c | 50 +++++++++++++++++++++++++++++-------------- t/t0003-attributes.sh | 20 +++++++++++++++++ 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/attr.c b/attr.c index d1daeb0b4d90a6..4999b7e09da930 100644 --- a/attr.c +++ b/attr.c @@ -1064,24 +1064,52 @@ static int path_matches(const char *pathname, int pathlen, pattern, prefix, pat->patternlen); } -static int macroexpand_one(struct all_attrs_item *all_attrs, int nr, int rem); +struct attr_state_queue { + const struct attr_state **items; + size_t alloc, nr; +}; + +static void attr_state_queue_push(struct attr_state_queue *t, + const struct match_attr *a) +{ + for (size_t i = 0; i < a->num_attr; i++) { + ALLOC_GROW(t->items, t->nr + 1, t->alloc); + t->items[t->nr++] = &a->state[i]; + } +} + +static const struct attr_state *attr_state_queue_pop(struct attr_state_queue *t) +{ + return t->nr ? t->items[--t->nr] : NULL; +} + +static void attr_state_queue_release(struct attr_state_queue *t) +{ + free(t->items); +} static int fill_one(struct all_attrs_item *all_attrs, const struct match_attr *a, int rem) { - size_t i; + struct attr_state_queue todo = { 0 }; + const struct attr_state *state; - for (i = a->num_attr; rem > 0 && i > 0; i--) { - const struct git_attr *attr = a->state[i - 1].attr; + attr_state_queue_push(&todo, a); + while (rem > 0 && (state = attr_state_queue_pop(&todo))) { + const struct git_attr *attr = state->attr; const char **n = &(all_attrs[attr->attr_nr].value); - const char *v = a->state[i - 1].setto; + const char *v = state->setto; if (*n == ATTR__UNKNOWN) { + const struct all_attrs_item *item = + &all_attrs[attr->attr_nr]; *n = v; rem--; - rem = macroexpand_one(all_attrs, attr->attr_nr, rem); + if (item->macro && item->value == ATTR__TRUE) + attr_state_queue_push(&todo, item->macro); } } + attr_state_queue_release(&todo); return rem; } @@ -1106,16 +1134,6 @@ static int fill(const char *path, int pathlen, int basename_offset, return rem; } -static int macroexpand_one(struct all_attrs_item *all_attrs, int nr, int rem) -{ - const struct all_attrs_item *item = &all_attrs[nr]; - - if (item->macro && item->value == ATTR__TRUE) - return fill_one(all_attrs, item->macro, rem); - else - return rem; -} - /* * Marks the attributes which are macros based on the attribute stack. * This prevents having to search through the attribute stack each time diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 3c98b622f25b76..582e207aa12eb1 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -664,4 +664,24 @@ test_expect_success 'user defined builtin_objectmode values are ignored' ' test_cmp expect err ' +test_expect_success ULIMIT_STACK_SIZE 'deep macro recursion' ' + n=3000 && + { + i=0 && + while test $i -lt $n; do + echo "[attr]a$i a$((i+1))" && + i=$((i+1)) || + return 1 + done && + echo "[attr]a$n -text" && + echo "file a0" + } >.gitattributes && + { + echo "file: text: unset" && + test_seq -f "file: a%d: set" 0 $n + } >expect && + run_with_limited_stack git check-attr -a file >actual && + test_cmp expect actual +' + test_done From dee80940b123ad7006e0497391d8c160ae15ba1b Mon Sep 17 00:00:00 2001 From: Julia Evans Date: Wed, 12 Nov 2025 19:53:20 +0000 Subject: [PATCH 059/553] doc: add an explanation of Git's data model Git very often uses the terms "object", "reference", or "index" in its documentation. However, it's hard to find a clear explanation of these terms and how they relate to each other in the documentation. The closest candidates currently are: 1. `gitglossary`. This makes a good effort, but it's an alphabetically ordered dictionary and a dictionary is not a good way to learn concepts. You have to jump around too much and it's not possible to present the concepts in the order that they should be explained. 2. `gitcore-tutorial`. This explains how to use the "core" Git commands. This is a nice document to have, but it's not necessary to learn how `update-index` works to understand Git's data model, and we should not be requiring users to learn how to use the "plumbing" commands if they want to learn what the term "index" or "object" means. 3. `gitrepository-layout`. This is a great resource, but it includes a lot of information about configuration and internal implementation details which are not related to the data model. It also does not explain how commits work. The result of this is that Git users (even users who have been using Git for 15+ years) struggle to read the documentation because they don't know what the core terms mean, and it's not possible to add links to help them learn more. Add an explanation of Git's data model. Some choices I've made in deciding what "core data model" means: 1. Omit pseudorefs like `FETCH_HEAD`, because it's not clear to me if those are intended to be user facing or if they're more like internal implementation details. 2. Don't talk about submodules other than by mentioning how they relate to trees. This is because Git has a lot of special features, and explaining how they all work exhaustively could quickly go down a rabbit hole which would make this document less useful for understanding Git's core behaviour. 3. Don't discuss the structure of a commit message (first line, trailers etc). 4. Don't mention configuration. 5. Don't mention the `.git` directory, to avoid getting too much into implementation details Signed-off-by: Julia Evans Signed-off-by: Junio C Hamano --- Documentation/Makefile | 1 + Documentation/gitdatamodel.adoc | 307 ++++++++++++++++++++++++++++ Documentation/glossary-content.adoc | 4 +- Documentation/meson.build | 1 + 4 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 Documentation/gitdatamodel.adoc diff --git a/Documentation/Makefile b/Documentation/Makefile index 6fb83d0c6ebf22..5f4acfacbdb6f0 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -52,6 +52,7 @@ MAN7_TXT += gitcli.adoc MAN7_TXT += gitcore-tutorial.adoc MAN7_TXT += gitcredentials.adoc MAN7_TXT += gitcvs-migration.adoc +MAN7_TXT += gitdatamodel.adoc MAN7_TXT += gitdiffcore.adoc MAN7_TXT += giteveryday.adoc MAN7_TXT += gitfaq.adoc diff --git a/Documentation/gitdatamodel.adoc b/Documentation/gitdatamodel.adoc new file mode 100644 index 00000000000000..3614f5960ea143 --- /dev/null +++ b/Documentation/gitdatamodel.adoc @@ -0,0 +1,307 @@ +gitdatamodel(7) +=============== + +NAME +---- +gitdatamodel - Git's core data model + +SYNOPSIS +-------- +gitdatamodel + +DESCRIPTION +----------- + +It's not necessary to understand Git's data model to use Git, but it's +very helpful when reading Git's documentation so that you know what it +means when the documentation says "object", "reference" or "index". + +Git's core operations use 4 kinds of data: + +1. <>: commits, trees, blobs, and tag objects +2. <>: branches, tags, + remote-tracking branches, etc +3. <>, also known as the staging area +4. <>: logs of changes to references ("ref log") + +[[objects]] +OBJECTS +------- + +All of the commits and files in a Git repository are stored as "Git objects". +Git objects never change after they're created, and every object has an ID, +like `1b61de420a21a2f1aaef93e38ecd0e45e8bc9f0a`. + +This means that if you have an object's ID, you can always recover its +exact contents as long as the object hasn't been deleted. + +Every object has: + +[[object-id]] +1. an *ID* (aka "object name"), which is a cryptographic hash of its + type and contents. + It's fast to look up a Git object using its ID. + This is usually represented in hexadecimal, like + `1b61de420a21a2f1aaef93e38ecd0e45e8bc9f0a`. +2. a *type*. There are 4 types of objects: + <>, <>, <>, + and <>. +3. *contents*. The structure of the contents depends on the type. + +Here's how each type of object is structured: + +[[commit]] +commit:: + A commit contains these required fields + (though there are other optional fields): ++ +1. The full directory structure of all the files in that version of the + repository and each file's contents, stored as the *<>* ID + of the commit's top-level directory +2. Its *parent commit ID(s)*. The first commit in a repository has 0 parents, + regular commits have 1 parent, merge commits have 2 or more parents +3. An *author* and the time the commit was authored +4. A *committer* and the time the commit was committed +5. A *commit message* ++ +Here's how an example commit is stored: ++ +---- +tree 1b61de420a21a2f1aaef93e38ecd0e45e8bc9f0a +parent 4ccb6d7b8869a86aae2e84c56523f8705b50c647 +author Maya 1759173425 -0400 +committer Maya 1759173425 -0400 + +Add README +---- ++ +Like all other objects, commits can never be changed after they're created. +For example, "amending" a commit with `git commit --amend` creates a new +commit with the same parent. ++ +Git does not store the diff for a commit: when you ask Git to show +the commit with linkgit:git-show[1], it calculates the diff from its +parent on the fly. + +[[tree]] +tree:: + A tree is how Git represents a directory. + It can contain files or other trees (which are subdirectories). + It lists, for each item in the tree: ++ +1. The *filename*, for example `hello.py` +2. The *file type*, which must be one of these five types: + - *regular file* + - *executable file* + - *symbolic link* + - *directory* + - *gitlink* (for use with submodules) +3. The <> with the contents of the file, directory, + or gitlink. ++ +For example, this is how a tree containing one directory (`src`) and one file +(`README.md`) is stored: ++ +---- +100644 blob 8728a858d9d21a8c78488c8b4e70e531b659141f README.md +040000 tree 89b1d2e0495f66d6929f4ff76ff1bb07fc41947d src +---- + +NOTE: In the output above, Git displays the file type of each tree entry +using a format that's loosely modelled on Unix file modes (`100644` is +"regular file", `100755` is "executable file", `120000` is "symbolic +link", `040000` is "directory", and `160000` is "gitlink"). It also +displays the object's type: `blob` for files and symlinks, `tree` for +directories, and `commit` for gitlinks. + +[[blob]] +blob:: + A blob object contains a file's contents. ++ +When you make a commit, Git stores the full contents of each file that +you changed as a blob. +For example, if you have a commit that changes 2 files in a repository +with 1000 files, that commit will create 2 new blobs, and use the +previous blob ID for the other 998 files. +This means that commits can use relatively little disk space even in a +very large repository. + +[[tag-object]] +tag object:: + Tag objects contain these required fields + (though there are other optional fields): ++ +1. The *ID* of the object it references +2. The *type* of the object it references +3. The *tagger* and tag date +4. A *tag message*, similar to a commit message + +Here's how an example tag object is stored: + +---- +object 750b4ead9c87ceb3ddb7a390e6c7074521797fb3 +type commit +tag v1.0.0 +tagger Maya 1759927359 -0400 + +Release version 1.0.0 +---- + +NOTE: All of the examples in this section were generated with +`git cat-file -p `. + +[[references]] +REFERENCES +---------- + +References are a way to give a name to a commit. +It's easier to remember "the changes I'm working on are on the `turtle` +branch" than "the changes are in commit bb69721404348e". +Git often uses "ref" as shorthand for "reference". + +References can either refer to: + +1. An object ID, usually a <> ID +2. Another reference. This is called a "symbolic reference" + +References are stored in a hierarchy, and Git handles references +differently based on where they are in the hierarchy. +Most references are under `refs/`. Here are the main types: + +[[branch]] +branches: `refs/heads/`:: + A branch refers to a commit ID. + That commit is the latest commit on the branch. ++ +To get the history of commits on a branch, Git will start at the commit +ID the branch references, and then look at the commit's parent(s), +the parent's parent, etc. + +[[tag]] +tags: `refs/tags/`:: + A tag refers to a commit ID, tag object ID, or other object ID. + There are two types of tags: + 1. "Annotated tags", which reference a <> ID + which contains a tag message + 2. "Lightweight tags", which reference a commit, blob, or tree ID + directly ++ +Even though branches and tags both refer to a commit ID, Git +treats them very differently. +Branches are expected to change over time: when you make a commit, Git +will update your <> to point to the new commit. +Tags are usually not changed after they're created. + +[[HEAD]] +HEAD: `HEAD`:: + `HEAD` is where Git stores your current <>, + if there is a current branch. `HEAD` can either be: ++ +1. A symbolic reference to your current branch, for example `ref: + refs/heads/main` if your current branch is `main`. +2. A direct reference to a commit ID. In this case there is no current branch. + This is called "detached HEAD state", see the DETACHED HEAD section + of linkgit:git-checkout[1] for more. + +[[remote-tracking-branch]] +remote-tracking branches: `refs/remotes//`:: + A remote-tracking branch refers to a commit ID. + It's how Git stores the last-known state of a branch in a remote + repository. `git fetch` updates remote-tracking branches. When + `git status` says "you're up to date with origin/main", it's looking at + this. ++ +`refs/remotes//HEAD` is a symbolic reference to the remote's +default branch. This is the branch that `git clone` checks out by default. + +[[other-refs]] +Other references:: + Git tools may create references anywhere under `refs/`. + For example, linkgit:git-stash[1], linkgit:git-bisect[1], + and linkgit:git-notes[1] all create their own references + in `refs/stash`, `refs/bisect`, etc. + Third-party Git tools may also create their own references. ++ +Git may also create references other than `HEAD` at the base of the +hierarchy, like `ORIG_HEAD`. + +NOTE: Git may delete objects that aren't "reachable" from any reference +or <>. +An object is "reachable" if we can find it by following tags to whatever +they tag, commits to their parents or trees, and trees to the trees or +blobs that they contain. +For example, if you amend a commit with `git commit --amend`, +there will no longer be a branch that points at the old commit. +The old commit is recorded in the current branch's <>, +so it is still "reachable", but when the reflog entry expires it may +become unreachable and get deleted. + +the old commit will usually not be reachable, so it may be deleted eventually. +Reachable objects will never be deleted. + +[[index]] +THE INDEX +--------- +The index, also known as the "staging area", is a list of files and +the contents of each file, stored as a <>. +You can add files to the index or update the contents of a file in the +index with linkgit:git-add[1]. This is called "staging" the file for commit. + +Unlike a <>, the index is a flat list of files. +When you commit, Git converts the list of files in the index to a +directory <> and uses that tree in the new <>. + +Each index entry has 4 fields: + +1. The *file type*, which must be one of: + - *regular file* + - *executable file* + - *symbolic link* + - *gitlink* (for use with submodules) +2. The *<>* ID of the file, + or (rarely) the *<>* ID of the submodule +3. The *stage number*, either 0, 1, 2, or 3. This is normally 0, but if + there's a merge conflict there can be multiple versions of the same + filename in the index. +4. The *file path*, for example `src/hello.py` + +It's extremely uncommon to look at the index directly: normally you'd +run `git status` to see a list of changes between the index and <>. +But you can use `git ls-files --stage` to see the index. +Here's the output of `git ls-files --stage` in a repository with 2 files: + +---- +100644 8728a858d9d21a8c78488c8b4e70e531b659141f 0 README.md +100644 665c637a360874ce43bf74018768a96d2d4d219a 0 src/hello.py +---- + +[[reflogs]] +REFLOGS +------- + +Every time a branch, remote-tracking branch, or HEAD is updated, Git +updates a log called a "reflog" for that <>. +This means that if you make a mistake and "lose" a commit, you can +generally recover the commit ID by running `git reflog `. + +A reflog is a list of log entries. Each entry has: + +1. The *commit ID* +2. *Timestamp* when the change was made +3. *Log message*, for example `pull: Fast-forward` + +Reflogs only log changes made in your local repository. +They are not shared with remotes. + +You can view a reflog with `git reflog `. +For example, here's the reflog for a `main` branch which has changed twice: + +---- +$ git reflog main --date=iso --no-decorate +750b4ea main@{2025-09-29 15:17:05 -0400}: commit: Add README +4ccb6d7 main@{2025-09-29 15:16:48 -0400}: commit (initial): Initial commit +---- + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/glossary-content.adoc b/Documentation/glossary-content.adoc index e423e4765b71b0..20ba121314b9a4 100644 --- a/Documentation/glossary-content.adoc +++ b/Documentation/glossary-content.adoc @@ -297,8 +297,8 @@ This commit is referred to as a "merge commit", or sometimes just a identified by its <>. The objects usually live in `$GIT_DIR/objects/`. -[[def_object_identifier]]object identifier (oid):: - Synonym for <>. +[[def_object_identifier]]object identifier, object ID, oid:: + Synonyms for <>. [[def_object_name]]object name:: The unique identifier of an <>. The diff --git a/Documentation/meson.build b/Documentation/meson.build index 44f94cdb7ba672..f3fcf4bc9196aa 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -192,6 +192,7 @@ manpages = { 'gitcore-tutorial.adoc' : 7, 'gitcredentials.adoc' : 7, 'gitcvs-migration.adoc' : 7, + 'gitdatamodel.adoc' : 7, 'gitdiffcore.adoc' : 7, 'giteveryday.adoc' : 7, 'gitfaq.adoc' : 7, From 8d4725e48ef29bd857e21e689411878b6eb4df92 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:47 -0800 Subject: [PATCH 060/553] whitespace: correct bit assignment comments A comment in diff.c claimed that bits up to 12th (counting from 0th) are whitespace rules, and 13th thru 15th are for new/old/context, but it turns out it was miscounting. Correct them, and clarify where the whitespace rule bits come from in the comment. Extend bit assignment comments to cover bits used for color-moved, which weren't described. Also update the way these bit constants are defined to use (1 << N) notation, instead of octal constants, as it tends to make it easier to notice a breakage like this. Sprinkle a few blank lines between logically distinct groups of CPP macro definitions to make them easier to read. Signed-off-by: Junio C Hamano --- diff.c | 7 +++++-- diff.h | 6 +++--- ws.h | 25 ++++++++++++++----------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/diff.c b/diff.c index a74e701806be52..74261b332af16c 100644 --- a/diff.c +++ b/diff.c @@ -801,16 +801,19 @@ enum diff_symbol { DIFF_SYMBOL_CONTEXT_MARKER, DIFF_SYMBOL_SEPARATOR }; + /* * Flags for content lines: - * 0..12 are whitespace rules - * 13-15 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT + * 0..11 are whitespace rules (see ws.h) + * 12..14 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD * 16 is marking if the line is blank at EOF + * 17..19 are used for color-moved. */ #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) #define DIFF_SYMBOL_MOVED_LINE (1<<17) #define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18) #define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<19) + #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) /* diff --git a/diff.h b/diff.h index 2fa256c3ef0079..cbd355cf50f68e 100644 --- a/diff.h +++ b/diff.h @@ -331,9 +331,9 @@ struct diff_options { int ita_invisible_in_index; /* white-space error highlighting */ -#define WSEH_NEW (1<<12) -#define WSEH_CONTEXT (1<<13) -#define WSEH_OLD (1<<14) +#define WSEH_NEW (1<<12) +#define WSEH_CONTEXT (1<<13) +#define WSEH_OLD (1<<14) unsigned ws_error_highlight; const char *prefix; int prefix_length; diff --git a/ws.h b/ws.h index 5ba676c5595db5..23708efb7322ed 100644 --- a/ws.h +++ b/ws.h @@ -7,19 +7,22 @@ struct strbuf; /* * whitespace rules. * used by both diff and apply - * last two digits are tab width + * last two octal-digits are tab width (we support only up to 63). */ -#define WS_BLANK_AT_EOL 0100 -#define WS_SPACE_BEFORE_TAB 0200 -#define WS_INDENT_WITH_NON_TAB 0400 -#define WS_CR_AT_EOL 01000 -#define WS_BLANK_AT_EOF 02000 -#define WS_TAB_IN_INDENT 04000 -#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) +#define WS_BLANK_AT_EOL (1<<6) +#define WS_SPACE_BEFORE_TAB (1<<7) +#define WS_INDENT_WITH_NON_TAB (1<<8) +#define WS_CR_AT_EOL (1<<9) +#define WS_BLANK_AT_EOF (1<<10) +#define WS_TAB_IN_INDENT (1<<11) + +#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8) -#define WS_TAB_WIDTH_MASK 077 -/* All WS_* -- when extended, adapt diff.c emit_symbol */ -#define WS_RULE_MASK 07777 +#define WS_TAB_WIDTH_MASK ((1<<6)-1) + +/* All WS_* -- when extended, adapt constants defined after diff.c:diff_symbol */ +#define WS_RULE_MASK ((1<<12)-1) + extern unsigned whitespace_rule_cfg; unsigned whitespace_rule(struct index_state *, const char *); unsigned parse_whitespace_rule(const char *); From f83d1afafb8b772397aa3854184c42f7810fa0df Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:48 -0800 Subject: [PATCH 061/553] diff: emit_line_ws_markup() if/else style fix Apply the simple rule: if you need {} in one arm of the if/else if/else... cascade, have {} in all of them. Signed-off-by: Junio C Hamano --- diff.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/diff.c b/diff.c index 74261b332af16c..9a24a0791ceaca 100644 --- a/diff.c +++ b/diff.c @@ -1327,14 +1327,14 @@ static void emit_line_ws_markup(struct diff_options *o, ws = NULL; } - if (!ws && !set_sign) + if (!ws && !set_sign) { emit_line_0(o, set, NULL, 0, reset, sign, line, len); - else if (!ws) { + } else if (!ws) { emit_line_0(o, set_sign, set, !!set_sign, reset, sign, line, len); - } else if (blank_at_eof) + } else if (blank_at_eof) { /* Blank line at EOF - paint '+' as well */ emit_line_0(o, ws, NULL, 0, reset, sign, line, len); - else { + } else { /* Emit just the prefix, then the rest. */ emit_line_0(o, set_sign ? set_sign : set, NULL, !!set_sign, reset, sign, "", 0); From fc7abcd9d5460b381701ef43b7f6dafa73962950 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:49 -0800 Subject: [PATCH 062/553] diff: correct suppress_blank_empty hack The suppress-blank-empty feature abused the CONTEXT_INCOMPLETE symbol that was meant to be used only for "\ No newline at the end of file" code path. The intent of the feature was to turn a context line we receive from xdiff machinery (which always uses ' ' for context lines, even an empty one) and spit it out as a truly empty line. Perform such a conversion very locally at where a line from xdiff that begins with ' ' is handled for output; there are many checks before the control reaches such place that checks the first letter of the diff output line to see if it is a context line, and having to check for '\n' and treat it as a special case is error prone. In order to catch similar hacks in the future, make sure the code path that is meant for "\ No newline" case checks the first byte is indeed a backslash. Signed-off-by: Junio C Hamano --- diff.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/diff.c b/diff.c index 9a24a0791ceaca..b9ef8550cc859a 100644 --- a/diff.c +++ b/diff.c @@ -1321,6 +1321,11 @@ static void emit_line_ws_markup(struct diff_options *o, const char *ws = NULL; int sign = o->output_indicators[sign_index]; + if (diff_suppress_blank_empty && + sign_index == OUTPUT_INDICATOR_CONTEXT && + len == 1 && line[0] == '\n') + sign = 0; + if (o->ws_error_highlight & ws_rule) { ws = diff_get_color_opt(o, DIFF_WHITESPACE); if (!*ws) @@ -1498,15 +1503,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, case DIFF_SYMBOL_WORDS: context = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); - /* - * Skip the prefix character, if any. With - * diff_suppress_blank_empty, there may be - * none. - */ - if (line[0] != '\n') { - line++; - len--; - } + + /* Skip the prefix character */ + line++; len--; emit_line(o, context, reset, line, len); break; case DIFF_SYMBOL_FILEPAIR_PLUS: @@ -2375,12 +2374,6 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } - if (diff_suppress_blank_empty - && len == 2 && line[0] == ' ' && line[1] == '\n') { - line[0] = '\n'; - len = 1; - } - if (line[0] == '@') { if (ecbdata->diff_words) diff_words_flush(ecbdata); @@ -2431,12 +2424,14 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->lno_in_preimage++; emit_context_line(ecbdata, line + 1, len - 1); break; - default: + case '\\': /* incomplete line at the end */ ecbdata->lno_in_preimage++; emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, line, len, 0); break; + default: + BUG("fn_out_consume: unknown line '%s'", line); } return 0; } From ced0561828271e8fc3fa2699754c5925969111b5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:50 -0800 Subject: [PATCH 063/553] diff: keep track of the type of the last line seen The "\ No newline at the end of the file" can come after any of the "-" (deleted preimage line), " " (unchanged line), or "+" (added postimage line). In later steps in this series, we will start treating a change that makes a file to end in an incomplete line as a whitespace error, and we would need to know what the previous line was when we react to "\ No newline" in the diff output. If the previous line was a context (i.e., unchanged) line, the file lacked the final newline before the change, and the change did not touch that line and left it still incomplete, so we do not want to warn in such a case. Teach fn_out_consume() function to keep track of what the previous line was, and prepare an otherwise empty switch statement to let us react differently to "\ No newline" based on that. Note that there is an existing curiosity (read: likely to be a bug) in the code that increments line number in the preimage file every time it sees a line with "\ No newline" on it, regardless of what the previous line was. I left it as-is, because it does not affect the main theme of this series, and more importantly, I do not think it matters, as these numbers are used only to compare them with blank_at_eof_in_{pre,post}image to issue a warning when we see more empty line was added at the end, but by definition, after we see "\ No newline at the end of the file" for an added line, we will not see an added line for the file. An independent audit to ensure that this curious increment can be safely removed would make a good #leftoverbits clean-up (we may even find some code that decrements this counter or over-increments the other quantity this counter is compared with that compensates the effect of this curious increment that hides a bug, in which case we may also need to remove them). Signed-off-by: Junio C Hamano --- diff.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/diff.c b/diff.c index b9ef8550cc859a..ff8fc91f88d30e 100644 --- a/diff.c +++ b/diff.c @@ -601,6 +601,7 @@ struct emit_callback { int blank_at_eof_in_postimage; int lno_in_preimage; int lno_in_postimage; + int last_line_kind; const char **label_path; struct diff_words_data *diff_words; struct diff_options *opt; @@ -2426,6 +2427,15 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) break; case '\\': /* incomplete line at the end */ + switch (ecbdata->last_line_kind) { + case '+': + case '-': + case ' ': + break; + default: + BUG("fn_out_consume: '\\No newline' after unknown line (%c)", + ecbdata->last_line_kind); + } ecbdata->lno_in_preimage++; emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, line, len, 0); @@ -2433,6 +2443,7 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) default: BUG("fn_out_consume: unknown line '%s'", line); } + ecbdata->last_line_kind = line[0]; return 0; } From 29228cbdc5f8a80b1f61c7cc209ba8e3714cc38e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:51 -0800 Subject: [PATCH 064/553] diff: refactor output of incomplete line Create a helper function that reacts to "\ No newline at the end of file" in preparation for unifying the incomplete line handling in the code path that handles xdiff output and the code path that bypasses xdiff and produces a complete-rewrite patch. Currently the output from the DIFF_SYMBOL_CONTEXT_INCOMPLETE case still (ab)uses the same code as what is used for context lines, but that would change in a later step where we introduce support to treat an incomplete line as a whitespace error. Signed-off-by: Junio C Hamano --- diff.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index ff8fc91f88d30e..7ee86204291161 100644 --- a/diff.c +++ b/diff.c @@ -1379,6 +1379,10 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, emit_line(o, "", "", line, len); break; case DIFF_SYMBOL_CONTEXT_INCOMPLETE: + set = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, set, reset, line, len); + break; case DIFF_SYMBOL_CONTEXT_MARKER: context = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); @@ -1668,6 +1672,13 @@ static void emit_context_line(struct emit_callback *ecbdata, emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT, line, len, flags); } +static void emit_incomplete_line_marker(struct emit_callback *ecbdata, + const char *line, int len) +{ + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT_INCOMPLETE, + line, len, 0); +} + static void emit_hunk_header(struct emit_callback *ecbdata, const char *line, int len) { @@ -2437,8 +2448,7 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->last_line_kind); } ecbdata->lno_in_preimage++; - emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, - line, len, 0); + emit_incomplete_line_marker(ecbdata, line, len); break; default: BUG("fn_out_consume: unknown line '%s'", line); From 35925f1832ac00a4926623dff061a3b52123470b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:52 -0800 Subject: [PATCH 065/553] diff: call emit_callback ecbdata everywhere Everybody else, except for emit_rewrite_lines(), calls the emit_callback data ecbdata. Make sure we call the same thing by the same name for consistency. Signed-off-by: Junio C Hamano --- diff.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index 7ee86204291161..44b86544b75f97 100644 --- a/diff.c +++ b/diff.c @@ -1780,7 +1780,7 @@ static void add_line_count(struct strbuf *out, int count) } } -static void emit_rewrite_lines(struct emit_callback *ecb, +static void emit_rewrite_lines(struct emit_callback *ecbdata, int prefix, const char *data, int size) { const char *endp = NULL; @@ -1791,17 +1791,17 @@ static void emit_rewrite_lines(struct emit_callback *ecb, endp = memchr(data, '\n', size); len = endp ? (endp - data + 1) : size; if (prefix != '+') { - ecb->lno_in_preimage++; - emit_del_line(ecb, data, len); + ecbdata->lno_in_preimage++; + emit_del_line(ecbdata, data, len); } else { - ecb->lno_in_postimage++; - emit_add_line(ecb, data, len); + ecbdata->lno_in_postimage++; + emit_add_line(ecbdata, data, len); } size -= len; data += len; } if (!endp) - emit_diff_symbol(ecb->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); } static void emit_rewrite_diff(const char *name_a, From 8d8e3c61874bbcf50d64aff34fb6c533458adf5e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:53 -0800 Subject: [PATCH 066/553] diff: update the way rewrite diff handles incomplete lines The diff_symbol based output framework uses one DIFF_SYMBOL_* enum value per the kind of output lines of "git diff", which corresponds to one output line from the xdiff machinery used internally. Most notably, DIFF_SYMBOL_PLUS and DIFF_SYMBOL_MINUS that correspond to "+" and "-" lines are designed to always take a complete line, even if the output from xdiff machinery may produce "\ No newline at the end of file" immediately after them. But this is not true in the rewrite-diff codepath, which completely bypasses the xdiff machinery. Since the code path feeds the bytes directly from the payload to the output routines, the output layer has to deal with an incomplete line with DIFF_SYMBOL_PLUS and DIFF_SYMBOL_MINUS, which never would see an incomplete line in the normal code paths. This lack of final newline is compensated by an ugly hack for a fabricated DIFF_SYMBOL_NO_LF_EOF token to inject an extra newline to the output to simulate output coming from the xdiff machinery. Revamp the way the complete-rewrite code path feeds the lines to the output layer by treating the last line of the pre/post image when it is an incomplete line specially. This lets us remove the DIFF_SYMBOL_NO_LF_EOF hack and use the usual DIFF_SYMBOL_CONTEXT_INCOMPLETE code path, which will later learn how to handle whitespace errors. Signed-off-by: Junio C Hamano --- diff.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/diff.c b/diff.c index 44b86544b75f97..5c606409bb9cbe 100644 --- a/diff.c +++ b/diff.c @@ -797,7 +797,6 @@ enum diff_symbol { DIFF_SYMBOL_CONTEXT_INCOMPLETE, DIFF_SYMBOL_PLUS, DIFF_SYMBOL_MINUS, - DIFF_SYMBOL_NO_LF_EOF, DIFF_SYMBOL_CONTEXT_FRAGINFO, DIFF_SYMBOL_CONTEXT_MARKER, DIFF_SYMBOL_SEPARATOR @@ -1352,7 +1351,6 @@ static void emit_line_ws_markup(struct diff_options *o, static void emit_diff_symbol_from_struct(struct diff_options *o, struct emitted_diff_symbol *eds) { - static const char *nneof = " No newline at end of file\n"; const char *context, *reset, *set, *set_sign, *meta, *fraginfo; enum diff_symbol s = eds->s; @@ -1361,13 +1359,6 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, unsigned flags = eds->flags; switch (s) { - case DIFF_SYMBOL_NO_LF_EOF: - context = diff_get_color_opt(o, DIFF_CONTEXT); - reset = diff_get_color_opt(o, DIFF_RESET); - putc('\n', o->file); - emit_line_0(o, context, NULL, 0, reset, '\\', - nneof, strlen(nneof)); - break; case DIFF_SYMBOL_SUBMODULE_HEADER: case DIFF_SYMBOL_SUBMODULE_ERROR: case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH: @@ -1786,22 +1777,38 @@ static void emit_rewrite_lines(struct emit_callback *ecbdata, const char *endp = NULL; while (0 < size) { - int len; + int len, plen; + char *pdata = NULL; endp = memchr(data, '\n', size); - len = endp ? (endp - data + 1) : size; + + if (endp) { + len = endp - data + 1; + plen = len; + } else { + len = size; + plen = len + 1; + pdata = xmalloc(plen + 2); + memcpy(pdata, data, len); + pdata[len] = '\n'; + pdata[len + 1] = '\0'; + } if (prefix != '+') { ecbdata->lno_in_preimage++; - emit_del_line(ecbdata, data, len); + emit_del_line(ecbdata, pdata ? pdata : data, plen); } else { ecbdata->lno_in_postimage++; - emit_add_line(ecbdata, data, len); + emit_add_line(ecbdata, pdata ? pdata : data, plen); } + free(pdata); size -= len; data += len; } - if (!endp) - emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); + if (!endp) { + static const char nneof[] = "\\ No newline at end of file\n"; + ecbdata->last_line_kind = prefix; + emit_incomplete_line_marker(ecbdata, nneof, sizeof(nneof) - 1); + } } static void emit_rewrite_diff(const char *name_a, From 3a4eb5ad2e9166255d5921196470710523f24ec4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:54 -0800 Subject: [PATCH 067/553] apply: revamp the parsing of incomplete lines A patch file represents the incomplete line at the end of the file with two lines, one that is the usual "context" with " " as the first letter, "added" with "+" as the first letter, or "removed" with "-" as the first letter that shows the content of the line, plus an extra "\ No newline at the end of file" line that comes immediately after it. Ever since the apply machinery was written, the "git apply" machinery parses "\ No newline at the end of file" line independently, without even knowing what line the incomplete-ness applies to, simply because it does not even remember what the previous line was. This poses a problem if we want to check and warn on an incomplete line. Revamp the code that parses a fragment, to actually drop the '\n' at the end of the incoming patch file that terminates a line, so that check_whitespace() calls made from the code path actually sees an incomplete as incomplete. Note that the result of this parsing is not directly used by the code path that applies the patch. apply_one_fragment() function already checks if each of the patch text it handles is followed by a line that begins with a backslash to drop the newline at the end of the current line it is looking at. In a sense, this patch harmonizes the behaviour of the parsing side to what is already done in the application side. Signed-off-by: Junio C Hamano --- apply.c | 70 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/apply.c b/apply.c index a2ceb3fb40d3b5..2b0f8bdab55463 100644 --- a/apply.c +++ b/apply.c @@ -1670,6 +1670,35 @@ static void check_old_for_crlf(struct patch *patch, const char *line, int len) } +/* + * Just saw a single line in a fragment. If it is a part of this hunk + * that is a context " ", an added "+", or a removed "-" line, it may + * be followed by "\\ No newline..." to signal that the last "\n" on + * this line needs to be dropped. Depending on locale settings when + * the patch was produced we don't know what this line would exactly + * say. The only thing we do know is that it begins with "\ ". + * Checking for 12 is just for sanity check; "\ No newline..." would + * be at least that long in any l10n. + * + * Return 0 if the line we saw is not followed by "\ No newline...", + * or length of that line. The caller will use it to skip over the + * "\ No newline..." line. + */ +static int adjust_incomplete(const char *line, int len, + unsigned long size) +{ + int nextlen; + + if (*line != '\n' && *line != ' ' && *line != '+' && *line != '-') + return 0; + if (size - len < 12 || memcmp(line + len, "\\ ", 2)) + return 0; + nextlen = linelen(line + len, size - len); + if (nextlen < 12) + return 0; + return nextlen; +} + /* * Parse a unified diff. Note that this really needs to parse each * fragment separately, since the only way to know the difference @@ -1684,6 +1713,7 @@ static int parse_fragment(struct apply_state *state, { int added, deleted; int len = linelen(line, size), offset; + int skip_len = 0; unsigned long oldlines, newlines; unsigned long leading, trailing; @@ -1710,6 +1740,22 @@ static int parse_fragment(struct apply_state *state, len = linelen(line, size); if (!len || line[len-1] != '\n') return -1; + + /* + * For an incomplete line, skip_len counts the bytes + * on "\\ No newline..." marker line that comes next + * to the current line. + * + * Reduce "len" to drop the newline at the end of + * line[], but add one to "skip_len", which will be + * added back to "len" for the next iteration, to + * compensate. + */ + skip_len = adjust_incomplete(line, len, size); + if (skip_len) { + len--; + skip_len++; + } switch (*line) { default: return -1; @@ -1745,20 +1791,10 @@ static int parse_fragment(struct apply_state *state, newlines--; trailing = 0; break; - - /* - * We allow "\ No newline at end of file". Depending - * on locale settings when the patch was produced we - * don't know what this line looks like. The only - * thing we do know is that it begins with "\ ". - * Checking for 12 is just for sanity check -- any - * l10n of "\ No newline..." is at least that long. - */ - case '\\': - if (len < 12 || memcmp(line, "\\ ", 2)) - return -1; - break; } + + /* eat the "\\ No newline..." as well, if exists */ + len += skip_len; } if (oldlines || newlines) return -1; @@ -1768,14 +1804,6 @@ static int parse_fragment(struct apply_state *state, fragment->leading = leading; fragment->trailing = trailing; - /* - * If a fragment ends with an incomplete line, we failed to include - * it in the above loop because we hit oldlines == newlines == 0 - * before seeing it. - */ - if (12 < size && !memcmp(line, "\\ ", 2)) - offset += linelen(line, size); - patch->lines_added += added; patch->lines_deleted += deleted; From a675104c399d242dd3ff5a0823fcd770563cf60f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:55 -0800 Subject: [PATCH 068/553] whitespace: allocate a few more bits and define WS_INCOMPLETE_LINE Reserve a few more bits in the diff flags word to be used for future whitespace rules. Add WS_INCOMPLETE_LINE without implementing the behaviour (yet). Signed-off-by: Junio C Hamano --- Documentation/config/core.adoc | 2 ++ diff.c | 16 ++++++++-------- diff.h | 6 +++--- ws.c | 6 ++++++ ws.h | 3 ++- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index e2de270c869c77..682fb595fb096c 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -626,6 +626,8 @@ core.whitespace:: part of the line terminator, i.e. with it, `trailing-space` does not trigger if the character before such a carriage-return is not a whitespace (not enabled by default). +* `incomplete-line` treats the last line of a file that is missing the + newline at the end as an error (not enabled by default). * `tabwidth=` tells how many character positions a tab occupies; this is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent` errors. The default tab width is 8. Allowed values are 1 to 63. diff --git a/diff.c b/diff.c index 5c606409bb9cbe..1b27b15f846333 100644 --- a/diff.c +++ b/diff.c @@ -804,15 +804,15 @@ enum diff_symbol { /* * Flags for content lines: - * 0..11 are whitespace rules (see ws.h) - * 12..14 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD - * 16 is marking if the line is blank at EOF - * 17..19 are used for color-moved. + * 0..15 are whitespace rules (see ws.h) + * 16..18 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD + * 19 is marking if the line is blank at EOF + * 20..22 are used for color-moved. */ -#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) -#define DIFF_SYMBOL_MOVED_LINE (1<<17) -#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18) -#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<19) +#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<19) +#define DIFF_SYMBOL_MOVED_LINE (1<<20) +#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<21) +#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<22) #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) diff --git a/diff.h b/diff.h index cbd355cf50f68e..422658407d4e86 100644 --- a/diff.h +++ b/diff.h @@ -331,9 +331,9 @@ struct diff_options { int ita_invisible_in_index; /* white-space error highlighting */ -#define WSEH_NEW (1<<12) -#define WSEH_CONTEXT (1<<13) -#define WSEH_OLD (1<<14) +#define WSEH_NEW (1<<16) +#define WSEH_CONTEXT (1<<17) +#define WSEH_OLD (1<<18) unsigned ws_error_highlight; const char *prefix; int prefix_length; diff --git a/ws.c b/ws.c index 70acee3337f241..34a7b4fad2840a 100644 --- a/ws.c +++ b/ws.c @@ -26,6 +26,7 @@ static struct whitespace_rule { { "blank-at-eol", WS_BLANK_AT_EOL, 0 }, { "blank-at-eof", WS_BLANK_AT_EOF, 0 }, { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 }, + { "incomplete-line", WS_INCOMPLETE_LINE, 0, 0 }, }; unsigned parse_whitespace_rule(const char *string) @@ -139,6 +140,11 @@ char *whitespace_error_string(unsigned ws) strbuf_addstr(&err, ", "); strbuf_addstr(&err, "tab in indent"); } + if (ws & WS_INCOMPLETE_LINE) { + if (err.len) + strbuf_addstr(&err, ", "); + strbuf_addstr(&err, "no newline at the end of file"); + } return strbuf_detach(&err, NULL); } diff --git a/ws.h b/ws.h index 23708efb7322ed..06d5cb73f8f88f 100644 --- a/ws.h +++ b/ws.h @@ -15,13 +15,14 @@ struct strbuf; #define WS_CR_AT_EOL (1<<9) #define WS_BLANK_AT_EOF (1<<10) #define WS_TAB_IN_INDENT (1<<11) +#define WS_INCOMPLETE_LINE (1<<12) #define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8) #define WS_TAB_WIDTH_MASK ((1<<6)-1) /* All WS_* -- when extended, adapt constants defined after diff.c:diff_symbol */ -#define WS_RULE_MASK ((1<<12)-1) +#define WS_RULE_MASK ((1<<16)-1) extern unsigned whitespace_rule_cfg; unsigned whitespace_rule(struct index_state *, const char *); From 9fb15a8e1430b77e2cc771e425ce4f0954ce4777 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:56 -0800 Subject: [PATCH 069/553] apply: check and fix incomplete lines The final line of a file that lacks the terminating newline at its end is called an incomplete line. In general they are frowned upon for many reasons (imagine concatenating two files with "cat A B" and what happens when A ends in an incomplete line, for example), and text-oriented tools often mishandle such a line. Implement checks in "git apply" for incomplete lines, which is off by default for backward compatibility's sake, so that "git apply --whitespace={fix,warn,error}" can notice, warn against, and fix them. As one of the new test shows, if you modify contents on an incomplete line in the original and leave the resulting line incomplete, it is still considered a whitespace error, the reasoning being that "you'd better fix it while at it if you are making a change on an incomplete line anyway", which may controversial. Signed-off-by: Junio C Hamano --- apply.c | 13 ++- t/t4124-apply-ws-rule.sh | 187 +++++++++++++++++++++++++++++++++++++++ ws.c | 14 +++ 3 files changed, 213 insertions(+), 1 deletion(-) diff --git a/apply.c b/apply.c index 2b0f8bdab55463..c9fb45247d8cd2 100644 --- a/apply.c +++ b/apply.c @@ -1640,6 +1640,14 @@ static void record_ws_error(struct apply_state *state, state->squelch_whitespace_errors < state->whitespace_error) return; + /* + * line[len] for an incomplete line points at the "\n" at the end + * of patch input line, so "%.*s" would drop the last letter on line; + * compensate for it. + */ + if (result & WS_INCOMPLETE_LINE) + len++; + err = whitespace_error_string(result); if (state->apply_verbosity > verbosity_silent) fprintf(stderr, "%s:%d: %s.\n%.*s\n", @@ -1794,7 +1802,10 @@ static int parse_fragment(struct apply_state *state, } /* eat the "\\ No newline..." as well, if exists */ - len += skip_len; + if (skip_len) { + len += skip_len; + state->linenr++; + } } if (oldlines || newlines) return -1; diff --git a/t/t4124-apply-ws-rule.sh b/t/t4124-apply-ws-rule.sh index 485c7d2d124ade..115a0f857906d4 100755 --- a/t/t4124-apply-ws-rule.sh +++ b/t/t4124-apply-ws-rule.sh @@ -556,4 +556,191 @@ test_expect_success 'whitespace check skipped for excluded paths' ' git apply --include=used --stat --whitespace=error sample-i && + (test_write_lines 1 2 3 0 5 && printf 6) >sample2-i && + cat sample-i >target && + git add target && + cat sample2-i >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample2-i >target && + git apply --whitespace=error -R target && + git apply -R --whitespace=error --check error && + test_cmp sample2-i target && + test_must_be_empty error +' + +test_expect_success 'last line made incomplete (error)' ' + test_write_lines 1 2 3 4 5 6 >sample && + (test_write_lines 1 2 3 4 5 && printf 6) >sample-i && + cat sample >target && + git add target && + cat sample-i >target && + git diff-files -p target >patch && + + cat sample >target && + test_must_fail git apply --whitespace=error error && + test_grep "no newline" error && + + cat sample >target && + test_must_fail git apply --whitespace=error --check actual && + test_cmp sample target && + cat >expect <<-\EOF && + :10: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample-i >target && + git apply --whitespace=error -R target && + git apply --whitespace=error --check -R error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample >target && + git apply --whitespace=fix sample-i && + test_write_lines 1 2 3 4 5 6 >sample && + cat sample-i >target && + git add target && + cat sample >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample >target && + test_must_fail git apply --whitespace=error --check -R actual && + test_cmp sample target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample >target && + git apply --whitespace=fix -R sample-i && + test_write_lines 1 2 3 4 5 7 >sample3 && + cat sample-i >target && + git add target && + cat sample3 >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample3 >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample3 >target && + test_must_fail git apply --whitespace=error -R --check actual && + test_cmp sample3 target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample3 >target && + git apply --whitespace=fix -R sample-i && + (test_write_lines 1 2 3 4 5 && printf 7) >sample3-i && + test_write_lines 1 2 3 4 5 6 >sample && + test_write_lines 1 2 3 4 5 7 >sample3 && + cat sample-i >target && + git add target && + cat sample3-i >target && + git diff-files -p target >patch && + + cat sample-i >target && + test_must_fail git apply --whitespace=error error && + test_grep "no newline" error && + + cat sample-i >target && + test_must_fail git apply --whitespace=error --check actual && + test_cmp sample-i target && + cat >expect <<-\EOF && + :11: no newline at the end of file. + 7 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample3-i >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample3-i >target && + test_must_fail git apply --whitespace=error --check -R actual && + test_cmp sample3-i target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample-i >target && + git apply --whitespace=fix target && + git apply --whitespace=fix -R Date: Wed, 12 Nov 2025 14:02:57 -0800 Subject: [PATCH 070/553] diff: highlight and error out on incomplete lines Teach "git diff" to highlight "\ No newline at end of file" message as a whitespace error when incomplete-line whitespace error class is in effect. Thanks to the previous refactoring of complete rewrite code path, we can do this at a single place. Unlike whitespace errors in the payload where we need to annotate in line, possibly using colors, the line that has whitespace problems, we have a dedicated line already that can serve as the error message, so paint it as a whitespace error message. Also teach "git diff --check" to notice incomplete lines as whitespace errors and report when incomplete-line whitespace error class is in effect. Signed-off-by: Junio C Hamano --- diff.c | 29 +++++++++++++++-- t/t4015-diff-whitespace.sh | 67 +++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index 1b27b15f846333..7b7cd50dc24351 100644 --- a/diff.c +++ b/diff.c @@ -1370,7 +1370,11 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, emit_line(o, "", "", line, len); break; case DIFF_SYMBOL_CONTEXT_INCOMPLETE: - set = diff_get_color_opt(o, DIFF_CONTEXT); + if ((flags & WS_INCOMPLETE_LINE) && + (flags & o->ws_error_highlight)) + set = diff_get_color_opt(o, DIFF_WHITESPACE); + else + set = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); emit_line(o, set, reset, line, len); break; @@ -1666,8 +1670,14 @@ static void emit_context_line(struct emit_callback *ecbdata, static void emit_incomplete_line_marker(struct emit_callback *ecbdata, const char *line, int len) { + int last_line_kind = ecbdata->last_line_kind; + unsigned flags = (last_line_kind == '+' + ? WSEH_NEW + : last_line_kind == '-' + ? WSEH_OLD + : WSEH_CONTEXT) | ecbdata->ws_rule; emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT_INCOMPLETE, - line, len, 0); + line, len, flags); } static void emit_hunk_header(struct emit_callback *ecbdata, @@ -3254,6 +3264,7 @@ struct checkdiff_t { struct diff_options *o; unsigned ws_rule; unsigned status; + int last_line_kind; }; static int is_conflict_marker(const char *line, int marker_size, unsigned long len) @@ -3292,6 +3303,7 @@ static void checkdiff_consume_hunk(void *priv, static int checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; + int last_line_kind; int marker_size = data->conflict_marker_size; const char *ws = diff_get_color(data->o->use_color, DIFF_WHITESPACE); const char *reset = diff_get_color(data->o->use_color, DIFF_RESET); @@ -3302,6 +3314,8 @@ static int checkdiff_consume(void *priv, char *line, unsigned long len) assert(data->o); line_prefix = diff_line_prefix(data->o); + last_line_kind = data->last_line_kind; + data->last_line_kind = line[0]; if (line[0] == '+') { unsigned bad; data->lineno++; @@ -3324,6 +3338,17 @@ static int checkdiff_consume(void *priv, char *line, unsigned long len) data->o->file, set, reset, ws); } else if (line[0] == ' ') { data->lineno++; + } else if (line[0] == '\\') { + /* no newline at the end of the line */ + if ((data->ws_rule & WS_INCOMPLETE_LINE) && + (last_line_kind == '+')) { + unsigned bad = WS_INCOMPLETE_LINE; + data->status |= bad; + err = whitespace_error_string(bad); + fprintf(data->o->file, "%s%s:%d: %s.\n", + line_prefix, data->filename, data->lineno, err); + free(err); + } } return 0; } diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 9de7f73f42b534..3c8eb02e4f3e64 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -43,6 +43,53 @@ do ' done +test_expect_success "incomplete line in both pre- and post-image context" ' + (echo foo && echo baz | tr -d "\012") >x && + git add x && + (echo bar && echo baz | tr -d "\012") >x && + git diff x && + git -c core.whitespace=incomplete diff --check x && + git diff -R x && + git -c core.whitespace=incomplete diff -R --check x +' + +test_expect_success "incomplete lines on both pre- and post-image" ' + # The interpretation taken here is "since you are touching + # the line anyway, you would better fix the incomplete line + # while you are at it." but this is debatable. + echo foo | tr -d "\012" >x && + git add x && + echo bar | tr -d "\012" >x && + git diff x && + test_must_fail git -c core.whitespace=incomplete diff --check x >error && + test_grep "no newline at the end of file" error && + git diff -R x && + test_must_fail git -c core.whitespace=incomplete diff -R --check x >error && + test_grep "no newline at the end of file" error +' + +test_expect_success "fix incomplete line in pre-image" ' + echo foo | tr -d "\012" >x && + git add x && + echo bar >x && + git diff x && + git -c core.whitespace=incomplete diff --check x && + git diff -R x && + test_must_fail git -c core.whitespace=incomplete diff -R --check x >error && + test_grep "no newline at the end of file" error +' + +test_expect_success "new incomplete line in post-image" ' + echo foo >x && + git add x && + echo bar | tr -d "\012" >x && + git diff x && + test_must_fail git -c core.whitespace=incomplete diff --check x >error && + test_grep "no newline at the end of file" error && + git diff -R x && + git -c core.whitespace=incomplete diff -R --check x +' + test_expect_success "Ray Lehtiniemi's example" ' cat <<-\EOF >x && do { @@ -1040,7 +1087,8 @@ test_expect_success 'ws-error-highlight test setup' ' { echo "0. blank-at-eol " && echo "1. still-blank-at-eol " && - echo "2. and a new line " + echo "2. and a new line " && + printf "3. and more" } >x && new_hash_x=$(git hash-object x) && after=$(git rev-parse --short "$new_hash_x") && @@ -1050,11 +1098,13 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF cat >expect.all <<-EOF && @@ -1062,11 +1112,13 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF cat >expect.none <<-EOF @@ -1074,16 +1126,19 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF ' test_expect_success 'test --ws-error-highlight option' ' + git config core.whitespace blank-at-eol,incomplete-line && git diff --color --ws-error-highlight=default,old >current.raw && test_decode_color current && @@ -1100,6 +1155,7 @@ test_expect_success 'test --ws-error-highlight option' ' ' test_expect_success 'test diff.wsErrorHighlight config' ' + git config core.whitespace blank-at-eol,incomplete-line && git -c diff.wsErrorHighlight=default,old diff --color >current.raw && test_decode_color current && @@ -1116,6 +1172,7 @@ test_expect_success 'test diff.wsErrorHighlight config' ' ' test_expect_success 'option overrides diff.wsErrorHighlight' ' + git config core.whitespace blank-at-eol,incomplete-line && git -c diff.wsErrorHighlight=none \ diff --color --ws-error-highlight=default,old >current.raw && @@ -1135,6 +1192,8 @@ test_expect_success 'option overrides diff.wsErrorHighlight' ' ' test_expect_success 'detect moved code, complete file' ' + git config core.whitespace blank-at-eol && + git reset --hard && cat <<-\EOF >test.c && #include From 51358a1ede7f4b6b50e4e5a86558af5204691fe0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:58 -0800 Subject: [PATCH 071/553] attr: enable incomplete-line whitespace error for this project Now "git diff --check" and "git apply --whitespace=warn/fix" learned incomplete line is a whitespace error, enable them for this project to prevent patches to add new incomplete lines to our source to both code and documentation files. Signed-off-by: Junio C Hamano --- .gitattributes | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 32583149c2f927..a8e2950a735677 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,13 +1,13 @@ * whitespace=!indent,trail,space -*.[ch] whitespace=indent,trail,space diff=cpp -*.sh whitespace=indent,trail,space text eol=lf +*.[ch] whitespace=indent,trail,space,incomplete diff=cpp +*.sh whitespace=indent,trail,space,incomplete text eol=lf *.perl text eol=lf diff=perl *.pl text eof=lf diff=perl *.pm text eol=lf diff=perl *.py text eol=lf diff=python *.bat text eol=crlf CODE_OF_CONDUCT.md -whitespace -/Documentation/**/*.adoc text eol=lf +/Documentation/**/*.adoc text eol=lf whitespace=trail,space,incomplete /command-list.txt text eol=lf /GIT-VERSION-GEN text eol=lf /mergetools/* text eol=lf From 4580bcd2354aab9369164d936f7ccaa21fc98c98 Mon Sep 17 00:00:00 2001 From: Koji Nakamaru Date: Fri, 14 Nov 2025 06:04:30 +0000 Subject: [PATCH 072/553] osxkeychain: avoid incorrectly skipping store operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-credential-osxkeychain skips storing a credential if its "get" action sets "state[]=osxkeychain:seen=1". This behavior was introduced in e1ab45b2 (osxkeychain: state to skip unnecessary store operations, 2024-05-15), which appeared in v2.46. However, this state[] persists even if a credential returned by "git-credential-osxkeychain get" is invalid and a subsequent helper's "get" operation returns a valid credential. Another subsequent helper (such as [1]) may expect git-credential-osxkeychain to store the valid credential, but the "store" operation is incorrectly skipped because it only checks "state[]=osxkeychain:seen=1". To solve this issue, "state[]=osxkeychain:seen" needs to contain enough information to identify whether the current "store" input matches the output from the previous "get" operation (and not a credential from another helper). Set "state[]=osxkeychain:seen" to a value encoding the credential output by "get", and compare it with a value encoding the credential input by "store". [1]: https://github.com/hickford/git-credential-oauth Reported-by: Petter Sælen Helped-by: Junio C Hamano Helped-by: brian m. carlson Signed-off-by: Koji Nakamaru Signed-off-by: Junio C Hamano --- contrib/credential/osxkeychain/Makefile | 41 +++++- .../osxkeychain/git-credential-osxkeychain.c | 120 ++++++++++++++---- contrib/credential/osxkeychain/meson.build | 1 + 3 files changed, 132 insertions(+), 30 deletions(-) diff --git a/contrib/credential/osxkeychain/Makefile b/contrib/credential/osxkeychain/Makefile index 9680717abe44c6..c68445b82dc3e5 100644 --- a/contrib/credential/osxkeychain/Makefile +++ b/contrib/credential/osxkeychain/Makefile @@ -1,21 +1,55 @@ # The default target of this Makefile is... all:: git-credential-osxkeychain +include ../../../config.mak.uname -include ../../../config.mak.autogen -include ../../../config.mak +ifdef ZLIB_NG + BASIC_CFLAGS += -DHAVE_ZLIB_NG + ifdef ZLIB_NG_PATH + BASIC_CFLAGS += -I$(ZLIB_NG_PATH)/include + EXTLIBS += $(call libpath_template,$(ZLIB_NG_PATH)/$(lib)) + endif + EXTLIBS += -lz-ng +else + ifdef ZLIB_PATH + BASIC_CFLAGS += -I$(ZLIB_PATH)/include + EXTLIBS += $(call libpath_template,$(ZLIB_PATH)/$(lib)) + endif + EXTLIBS += -lz +endif +ifndef NO_ICONV + ifdef NEEDS_LIBICONV + ifdef ICONVDIR + BASIC_CFLAGS += -I$(ICONVDIR)/include + ICONV_LINK = $(call libpath_template,$(ICONVDIR)/$(lib)) + else + ICONV_LINK = + endif + ifdef NEEDS_LIBINTL_BEFORE_LIBICONV + ICONV_LINK += -lintl + endif + EXTLIBS += $(ICONV_LINK) -liconv + endif +endif +ifndef LIBC_CONTAINS_LIBINTL + EXTLIBS += -lintl +endif + prefix ?= /usr/local gitexecdir ?= $(prefix)/libexec/git-core CC ?= gcc -CFLAGS ?= -g -O2 -Wall +CFLAGS ?= -g -O2 -Wall -I../../.. $(BASIC_CFLAGS) +LDFLAGS ?= $(BASIC_LDFLAGS) $(EXTLIBS) INSTALL ?= install RM ?= rm -f %.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< -git-credential-osxkeychain: git-credential-osxkeychain.o +git-credential-osxkeychain: git-credential-osxkeychain.o ../../../libgit.a $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) \ -framework Security -framework CoreFoundation @@ -23,6 +57,9 @@ install: git-credential-osxkeychain $(INSTALL) -d -m 755 $(DESTDIR)$(gitexecdir) $(INSTALL) -m 755 $< $(DESTDIR)$(gitexecdir) +../../../libgit.a: + cd ../../..; make libgit.a + clean: $(RM) git-credential-osxkeychain git-credential-osxkeychain.o diff --git a/contrib/credential/osxkeychain/git-credential-osxkeychain.c b/contrib/credential/osxkeychain/git-credential-osxkeychain.c index 611c9798b3ae5c..b18026703418a9 100644 --- a/contrib/credential/osxkeychain/git-credential-osxkeychain.c +++ b/contrib/credential/osxkeychain/git-credential-osxkeychain.c @@ -2,6 +2,9 @@ #include #include #include +#include "git-compat-util.h" +#include "strbuf.h" +#include "wrapper.h" #define ENCODING kCFStringEncodingUTF8 static CFStringRef protocol; /* Stores constant strings - not memory managed */ @@ -12,7 +15,7 @@ static CFStringRef username; static CFDataRef password; static CFDataRef password_expiry_utc; static CFDataRef oauth_refresh_token; -static int state_seen; +static char *state_seen; static void clear_credential(void) { @@ -48,27 +51,6 @@ static void clear_credential(void) #define STRING_WITH_LENGTH(s) s, sizeof(s) - 1 -__attribute__((format (printf, 1, 2), __noreturn__)) -static void die(const char *err, ...) -{ - char msg[4096]; - va_list params; - va_start(params, err); - vsnprintf(msg, sizeof(msg), err, params); - fprintf(stderr, "%s\n", msg); - va_end(params); - clear_credential(); - exit(1); -} - -static void *xmalloc(size_t len) -{ - void *ret = malloc(len); - if (!ret) - die("Out of memory"); - return ret; -} - static CFDictionaryRef create_dictionary(CFAllocatorRef allocator, ...) { va_list args; @@ -112,6 +94,66 @@ static void write_item(const char *what, const char *buf, size_t len) putchar('\n'); } +static void write_item_strbuf(struct strbuf *sb, const char *what, const char *buf, int n) +{ + char s[32]; + + xsnprintf(s, sizeof(s), "__%s=", what); + strbuf_add(sb, s, strlen(s)); + strbuf_add(sb, buf, n); +} + +static void write_item_strbuf_cfstring(struct strbuf *sb, const char *what, CFStringRef ref) +{ + char *buf; + int len; + + if (!ref) + return; + len = CFStringGetMaximumSizeForEncoding(CFStringGetLength(ref), ENCODING) + 1; + buf = xmalloc(len); + if (CFStringGetCString(ref, buf, len, ENCODING)) + write_item_strbuf(sb, what, buf, strlen(buf)); + free(buf); +} + +static void write_item_strbuf_cfnumber(struct strbuf *sb, const char *what, CFNumberRef ref) +{ + short n; + char buf[32]; + + if (!ref) + return; + if (!CFNumberGetValue(ref, kCFNumberShortType, &n)) + return; + xsnprintf(buf, sizeof(buf), "%d", n); + write_item_strbuf(sb, what, buf, strlen(buf)); +} + +static void write_item_strbuf_cfdata(struct strbuf *sb, const char *what, CFDataRef ref) +{ + char *buf; + int len; + + if (!ref) + return; + buf = (char *)CFDataGetBytePtr(ref); + if (!buf || strlen(buf) == 0) + return; + len = CFDataGetLength(ref); + write_item_strbuf(sb, what, buf, len); +} + +static void encode_state_seen(struct strbuf *sb) +{ + strbuf_add(sb, "osxkeychain:seen=", strlen("osxkeychain:seen=")); + write_item_strbuf_cfstring(sb, "host", host); + write_item_strbuf_cfnumber(sb, "port", port); + write_item_strbuf_cfstring(sb, "path", path); + write_item_strbuf_cfstring(sb, "username", username); + write_item_strbuf_cfdata(sb, "password", password); +} + static void find_username_in_item(CFDictionaryRef item) { CFStringRef account_ref; @@ -124,6 +166,7 @@ static void find_username_in_item(CFDictionaryRef item) write_item("username", "", 0); return; } + username = CFStringCreateCopy(kCFAllocatorDefault, account_ref); username_buf = (char *)CFStringGetCStringPtr(account_ref, ENCODING); if (username_buf) @@ -163,6 +206,7 @@ static OSStatus find_internet_password(void) } data = CFDictionaryGetValue(item, kSecValueData); + password = CFDataCreateCopy(kCFAllocatorDefault, data); write_item("password", (const char *)CFDataGetBytePtr(data), @@ -173,7 +217,14 @@ static OSStatus find_internet_password(void) CFRelease(item); write_item("capability[]", "state", strlen("state")); - write_item("state[]", "osxkeychain:seen=1", strlen("osxkeychain:seen=1")); + { + struct strbuf sb; + + strbuf_init(&sb, 1024); + encode_state_seen(&sb); + write_item("state[]", sb.buf, strlen(sb.buf)); + strbuf_release(&sb); + } out: CFRelease(attrs); @@ -288,13 +339,22 @@ static OSStatus add_internet_password(void) CFDictionaryRef attrs; OSStatus result; - if (state_seen) - return errSecSuccess; - /* Only store complete credentials */ if (!protocol || !host || !username || !password) return -1; + if (state_seen) { + struct strbuf sb; + + strbuf_init(&sb, 1024); + encode_state_seen(&sb); + if (!strcmp(state_seen, sb.buf)) { + strbuf_release(&sb); + return errSecSuccess; + } + strbuf_release(&sb); + } + data = CFDataCreateMutableCopy(kCFAllocatorDefault, 0, password); if (password_expiry_utc) { CFDataAppendBytes(data, @@ -403,8 +463,9 @@ static void read_credential(void) (UInt8 *)v, strlen(v)); else if (!strcmp(buf, "state[]")) { - if (!strcmp(v, "osxkeychain:seen=1")) - state_seen = 1; + int len = strlen("osxkeychain:seen="); + if (!strncmp(v, "osxkeychain:seen=", len)) + state_seen = xstrdup(v); } /* * Ignore other lines; we don't know what they mean, but @@ -443,5 +504,8 @@ int main(int argc, const char **argv) clear_credential(); + if (state_seen) + free(state_seen); + return 0; } diff --git a/contrib/credential/osxkeychain/meson.build b/contrib/credential/osxkeychain/meson.build index 3c7677f736c684..ec91d0c14b0eb1 100644 --- a/contrib/credential/osxkeychain/meson.build +++ b/contrib/credential/osxkeychain/meson.build @@ -1,6 +1,7 @@ executable('git-credential-osxkeychain', sources: 'git-credential-osxkeychain.c', dependencies: [ + libgit, dependency('CoreFoundation'), dependency('Security'), ], From df90eccd931dfd8e6ecbc0c18c5037c85cc115dc Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Fri, 14 Nov 2025 15:04:47 +0100 Subject: [PATCH 073/553] doc: commit: link to git-status(1) on all format options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `--branch` and `--long` refer to git-status(1) options but they don’t tell us what `short-format` and `long-format` are, respectively. And `--null` mentions “status” but does not link to the command. Refer to git-config(1) on `--branch` like `--short` does. `long-format` is the git-status(1) output. So we can just say that directly. Replace “status” with a `linkgit` on `--null`. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-commit.adoc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/git-commit.adoc b/Documentation/git-commit.adoc index 54c207ad45eaa2..8329c1034b9b30 100644 --- a/Documentation/git-commit.adoc +++ b/Documentation/git-commit.adoc @@ -146,7 +146,8 @@ See linkgit:git-rebase[1] for details. linkgit:git-status[1] for details. Implies `--dry-run`. `--branch`:: - Show the branch and tracking info even in short-format. + Show the branch and tracking info even in short-format. See + linkgit:git-status[1] for details. `--porcelain`:: When doing a dry-run, give the output in a porcelain-ready @@ -154,12 +155,13 @@ See linkgit:git-rebase[1] for details. `--dry-run`. `--long`:: - When doing a dry-run, give the output in the long-format. - Implies `--dry-run`. + When doing a dry-run, give the output in the long-format. This + is the default output of linkgit:git-status[1]. Implies + `--dry-run`. `-z`:: `--null`:: - When showing `short` or `porcelain` status output, print the + When showing `short` or `porcelain` linkgit:git-status[1] output, print the filename verbatim and terminate the entries with _NUL_, instead of _LF_. If no format is given, implies the `--porcelain` output format. Without the `-z` option, filenames with "unusual" characters are From 66c78e0653a4e60c625b8400da31da0ba5bd1286 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sat, 15 Nov 2025 00:58:17 +0000 Subject: [PATCH 074/553] object-file: disallow adding submodules of different hash algo The design of the hash algorithm transition plan is that objects stored must be entirely in one algorithm since we lack any way to indicate a mix of algorithms. This also includes submodules, but we have traditionally not enforced this, which leads to various problems when trying to clone or check out the the submodule from the remote. Since this cannot work in the general case, restrict adding a submodule of a different algorithm to the index. Add tests for git add and git submodule add that these are rejected. Note that we cannot check this in git fsck because the malformed submodule is stored in the tree as an object ID which is either truncated (when a SHA-256 submodule is added to a SHA-1 repository) or padded with zeros (when a SHA-1 submodule is added to a SHA-256 repository). We cannot detect even the latter case because someone could have an actual submodule that actually ends in 24 zeros, which would be a false positive. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- object-file.c | 6 +++++- t/t3700-add.sh | 25 +++++++++++++++++++++++++ t/t7400-submodule-basic.sh | 25 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee8cbf..6ff6cf75499d30 100644 --- a/object-file.c +++ b/object-file.c @@ -1296,7 +1296,11 @@ int index_path(struct index_state *istate, struct object_id *oid, strbuf_release(&sb); break; case S_IFDIR: - return repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid); + if (repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid)) + return -1; + if (&hash_algos[oid->algo] != istate->repo->hash_algo) + return error(_("cannot add a submodule of a different hash algorithm")); + break; default: return error(_("%s: unsupported file type"), path); } diff --git a/t/t3700-add.sh b/t/t3700-add.sh index df580a5806b4f1..9a2c8dbcc23d91 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -541,6 +541,31 @@ test_expect_success 'all statuses changed in folder if . is given' ' ) ' +test_expect_success 'cannot add a submodule of a different algorithm' ' + git init --object-format=sha256 sha256 && + ( + cd sha256 && + test_commit abc && + git init --object-format=sha1 submodule && + test_commit -C submodule def && + test_must_fail git add submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) && + git init --object-format=sha1 sha1 && + ( + cd sha1 && + test_commit abc && + git init --object-format=sha256 submodule && + test_commit -C submodule def && + test_must_fail git add submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) +' + test_expect_success CASE_INSENSITIVE_FS 'path is case-insensitive' ' path="$(pwd)/BLUB" && touch "$path" && diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index fd3e7e355e4ffc..e6b551daad7e58 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -407,6 +407,31 @@ test_expect_success 'submodule add in subdirectory with relative path should fai test_grep toplevel output.err ' +test_expect_success 'submodule add of a different algorithm fails' ' + git init --object-format=sha256 sha256 && + ( + cd sha256 && + test_commit abc && + git init --object-format=sha1 submodule && + test_commit -C submodule def && + test_must_fail git submodule add "$submodurl" submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) && + git init --object-format=sha1 sha1 && + ( + cd sha1 && + test_commit abc && + git init --object-format=sha256 submodule && + test_commit -C submodule def && + test_must_fail git submodule add "$submodurl" submodule 2>err && + test_grep "cannot add a submodule of a different hash algorithm" err && + git ls-files --stage >entries && + test_grep ! ^160000 entries + ) +' + test_expect_success 'setup - add an example entry to .gitmodules' ' git config --file=.gitmodules submodule.example.url git://example.com/init.git ' From 6fe288bfbcbbabc3d399dd71f876dccf71affff0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 15 Nov 2025 00:58:18 +0000 Subject: [PATCH 075/553] read-cache: drop submodule check from add_to_cache() In add_to_cache(), we treat any directories as submodules, and complain if we can't resolve their HEAD. This call to resolve_gitlink_ref() was added by f937bc2f86 (add: error appropriately on repository with no commits, 2019-04-09), with the goal of improving the error message for empty repositories. But we already resolve the submodule HEAD in index_path(), which is where we find the actual oid we're going to use. Resolving it again here introduces some downsides: 1. It's more work, since we have to open up the submodule repository's files twice. 2. There are call paths that get to index_path() without going through add_to_cache(). For instance, we'd want a similar informative message if "git diff empty" finds that it can't resolve the submodule's HEAD. (In theory we can also get there through update-index, but AFAICT it refuses to consider directories as submodules at all, and just complains about them). 3. The resolution in index_path() catches more errors that we don't handle here. In particular, it will validate that the object format for the submodule matches that of the superproject. This isn't a bug, since our call in add_to_cache() throws away the oid it gets without looking at it. But it certainly caused confusion for me when looking at where the object-format check should go. So instead of resolving the submodule HEAD in add_to_cache(), let's just teach the call in index_path() to actually produce an error message (which it already does for other cases). That's probably what f937bc2f86 should have done in the first place, and it gives us a single point of resolution when adding a submodule to the index. The resulting output is slightly more verbose, as we propagate the error up the call stack, but I think that's OK (and again, matches many other errors we get when indexing fails). I've left the text of the error message as-is, though it is perhaps overly specific. There are many reasons that resolving the submodule HEAD might fail, though outside of corruption or system errors it is probably most likely that the submodule HEAD is simply on an unborn branch. Signed-off-by: Jeff King Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- object-file.c | 2 +- read-cache.c | 3 --- t/t3700-add.sh | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/object-file.c b/object-file.c index 6ff6cf75499d30..bb0c77b45d89a5 100644 --- a/object-file.c +++ b/object-file.c @@ -1297,7 +1297,7 @@ int index_path(struct index_state *istate, struct object_id *oid, break; case S_IFDIR: if (repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid)) - return -1; + return error(_("'%s' does not have a commit checked out"), path); if (&hash_algos[oid->algo] != istate->repo->hash_algo) return error(_("cannot add a submodule of a different hash algorithm")); break; diff --git a/read-cache.c b/read-cache.c index 06ad74db2286ae..e34c5c56c637fd 100644 --- a/read-cache.c +++ b/read-cache.c @@ -707,7 +707,6 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, int add_option = (ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE| (intent_only ? ADD_CACHE_NEW_ONLY : 0)); unsigned hash_flags = pretend ? 0 : INDEX_WRITE_OBJECT; - struct object_id oid; if (flags & ADD_CACHE_RENORMALIZE) hash_flags |= INDEX_RENORMALIZE; @@ -717,8 +716,6 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, namelen = strlen(path); if (S_ISDIR(st_mode)) { - if (repo_resolve_gitlink_ref(the_repository, path, "HEAD", &oid) < 0) - return error(_("'%s' does not have a commit checked out"), path); while (namelen && path[namelen-1] == '/') namelen--; } diff --git a/t/t3700-add.sh b/t/t3700-add.sh index 9a2c8dbcc23d91..af93e53c12cfe3 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -388,6 +388,7 @@ test_expect_success 'error on a repository with no commits' ' test_must_fail git add empty >actual 2>&1 && cat >expect <<-EOF && error: '"'empty/'"' does not have a commit checked out + error: unable to index file '"'empty/'"' fatal: adding files failed EOF test_cmp expect actual From 878fef8ebf6cf513842de14284ee58f4d92fcef3 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 15 Nov 2025 08:36:10 -0500 Subject: [PATCH 076/553] t/unit-tests: add UTF-8 width tests for CJK chars The file "builtin/repo.c" uses utf8_strwidth() to calculate the display width of UTF-8 characters in a table, but the resulting output is still misaligned. Add test cases for both utf8_strwidth and utf8_strnwidth to verify that they correctly compute the display width for UTF-8 characters. Also updated the build configuration in Makefile and meson.build to include the new test suite in the build process. Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- Makefile | 1 + t/meson.build | 1 + t/unit-tests/u-utf8-width.c | 97 +++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 t/unit-tests/u-utf8-width.c diff --git a/Makefile b/Makefile index 7e0f77e2988e3b..2a675461540c26 100644 --- a/Makefile +++ b/Makefile @@ -1525,6 +1525,7 @@ CLAR_TEST_SUITES += u-string-list CLAR_TEST_SUITES += u-strvec CLAR_TEST_SUITES += u-trailer CLAR_TEST_SUITES += u-urlmatch-normalization +CLAR_TEST_SUITES += u-utf8-width CLAR_TEST_PROG = $(UNIT_TEST_BIN)/unit-tests$(X) CLAR_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(CLAR_TEST_SUITES)) CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/clar/clar.o diff --git a/t/meson.build b/t/meson.build index a5531df415ffe2..dc43d69636d15c 100644 --- a/t/meson.build +++ b/t/meson.build @@ -24,6 +24,7 @@ clar_test_suites = [ 'unit-tests/u-strvec.c', 'unit-tests/u-trailer.c', 'unit-tests/u-urlmatch-normalization.c', + 'unit-tests/u-utf8-width.c', ] clar_sources = [ diff --git a/t/unit-tests/u-utf8-width.c b/t/unit-tests/u-utf8-width.c new file mode 100644 index 00000000000000..3766f19726ed9d --- /dev/null +++ b/t/unit-tests/u-utf8-width.c @@ -0,0 +1,97 @@ +#include "unit-test.h" +#include "utf8.h" +#include "strbuf.h" + +/* + * Test utf8_strnwidth with various Chinese strings + * Chinese characters typically have a width of 2 columns when displayed + */ +void test_utf8_width__strnwidth_chinese(void) +{ + const char *str; + + /* Test basic ASCII - each character should have width 1 */ + cl_assert_equal_i(5, utf8_strnwidth("Hello", 5, 0)); + /* skip_ansi = 1 */ + cl_assert_equal_i(5, utf8_strnwidth("Hello", 5, 1)); + + /* Test simple Chinese characters - each should have width 2 */ + /* "你好" is 6 bytes (3 bytes per char in UTF-8), 4 display columns */ + cl_assert_equal_i(4, utf8_strnwidth("你好", 6, 0)); + + /* Test mixed ASCII and Chinese - ASCII = 1 column, Chinese = 2 columns */ + /* "h"(1) + "i"(1) + "你"(2) + "好"(2) = 6 */ + cl_assert_equal_i(6, utf8_strnwidth("Hi你好", 8, 0)); + + /* Test longer Chinese string */ + /* 5 Chinese chars = 10 display columns */ + cl_assert_equal_i(10, utf8_strnwidth("你好世界!", 15, 0)); + + /* Test individual Chinese character width */ + cl_assert_equal_i(2, utf8_strnwidth("中", 3, 0)); + + /* Test empty string */ + cl_assert_equal_i(0, utf8_strnwidth("", 0, 0)); + + /* Test length limiting */ + str = "你好世界"; + /* Only first char "你"(2 columns) within 3 bytes */ + cl_assert_equal_i(2, utf8_strnwidth(str, 3, 0)); + /* First two chars "你好"(4 columns) in 6 bytes */ + cl_assert_equal_i(4, utf8_strnwidth(str, 6, 0)); +} + +/* + * Tests for utf8_strwidth (simpler version without length limit) + */ +void test_utf8_width__strwidth_chinese(void) +{ + /* Test basic ASCII */ + cl_assert_equal_i(5, utf8_strwidth("Hello")); + + /* Test Chinese characters */ + /* 2 Chinese chars = 4 display columns */ + cl_assert_equal_i(4, utf8_strwidth("你好")); + + /* Test longer Chinese string */ + /* 5 Chinese chars = 10 display columns */ + cl_assert_equal_i(10, utf8_strwidth("你好世界!")); + + /* Test mixed ASCII and Chinese */ + /* 5 ASCII (5 cols) + 2 Chinese (4 cols) = 9 */ + cl_assert_equal_i(9, utf8_strwidth("Hello世界")); + /* 2 ASCII (2 cols) + 2 Chinese (4 cols) + 1 ASCII (1 col) = 7 */ + cl_assert_equal_i(7, utf8_strwidth("Hi世界!")); +} + +/* + * Additional tests with other East Asian characters + */ +void test_utf8_width__strnwidth_japanese_korean(void) +{ + /* Japanese characters (should also be 2 columns each) */ + /* 5 Japanese chars x 2 cols each = 10 display columns */ + cl_assert_equal_i(10, utf8_strnwidth("こんにちは", 15, 0)); + + /* Korean characters (should also be 2 columns each) */ + /* 5 Korean chars x 2 cols each = 10 display columns */ + cl_assert_equal_i(10, utf8_strnwidth("안녕하세요", 15, 0)); +} + +/* + * Test utf8_strnwidth with CJK strings and ANSI sequences + */ +void test_utf8_width__strnwidth_cjk_with_ansi(void) +{ + /* Test CJK with ANSI sequences */ + const char *ansi_test = "\033[1m你好\033[0m"; + int width = utf8_strnwidth(ansi_test, strlen(ansi_test), 1); + /* Should skip ANSI sequences and count "你好" as 4 columns */ + cl_assert_equal_i(4, width); + + /* Test mixed ASCII, CJK, and ANSI */ + ansi_test = "Hello\033[32m世界\033[0m!"; + width = utf8_strnwidth(ansi_test, strlen(ansi_test), 1); + /* "Hello"(5) + "世界"(4) + "!"(1) = 10 */ + cl_assert_equal_i(10, width); +} From 7a03a10a3a746dd8565a3a0e6126f60523b41738 Mon Sep 17 00:00:00 2001 From: Jiang Xin Date: Sat, 15 Nov 2025 08:36:11 -0500 Subject: [PATCH 077/553] builtin/repo: fix table alignment for UTF-8 characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output table from "git repo structure" is misaligned when displaying UTF-8 characters (e.g., non-ASCII glyphs). E.g.: | 仓库结构 | 值 | | -------------- | ---- | | * 引用 | | | * 计数 | 67 | The previous implementation used simple width formatting with printf() which didn't properly handle multi-byte UTF-8 characters, causing misaligned table columns when displaying repository structure information. This change modifies the stats_table_print_structure function to use strbuf_utf8_align() instead of basic printf width specifiers. This ensures proper column alignment regardless of the character encoding of the content being displayed. Also add test cases for strbuf_utf8_align(), a function newly introduced in "builtin/repo.c". Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- builtin/repo.c | 21 +++++++++++++++++---- t/unit-tests/u-utf8-width.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 9d4749f79befa8..e3adb353a24ae8 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -292,14 +292,20 @@ static void stats_table_print_structure(const struct stats_table *table) int name_col_width = utf8_strwidth(name_col_title); int value_col_width = utf8_strwidth(value_col_title); struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; if (table->name_col_width > name_col_width) name_col_width = table->name_col_width; if (table->value_col_width > value_col_width) value_col_width = table->value_col_width; - printf("| %-*s | %-*s |\n", name_col_width, name_col_title, - value_col_width, value_col_title); + strbuf_addstr(&buf, "| "); + strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, name_col_title); + strbuf_addstr(&buf, " | "); + strbuf_utf8_align(&buf, ALIGN_LEFT, value_col_width, value_col_title); + strbuf_addstr(&buf, " |"); + printf("%s\n", buf.buf); + printf("| "); for (int i = 0; i < name_col_width; i++) putchar('-'); @@ -317,9 +323,16 @@ static void stats_table_print_structure(const struct stats_table *table) value = entry->value; } - printf("| %-*s | %*s |\n", name_col_width, item->string, - value_col_width, value); + strbuf_reset(&buf); + strbuf_addstr(&buf, "| "); + strbuf_utf8_align(&buf, ALIGN_LEFT, name_col_width, item->string); + strbuf_addstr(&buf, " | "); + strbuf_utf8_align(&buf, ALIGN_RIGHT, value_col_width, value); + strbuf_addstr(&buf, " |"); + printf("%s\n", buf.buf); } + + strbuf_release(&buf); } static void stats_table_clear(struct stats_table *table) diff --git a/t/unit-tests/u-utf8-width.c b/t/unit-tests/u-utf8-width.c index 3766f19726ed9d..86e09c3574a331 100644 --- a/t/unit-tests/u-utf8-width.c +++ b/t/unit-tests/u-utf8-width.c @@ -95,3 +95,40 @@ void test_utf8_width__strnwidth_cjk_with_ansi(void) /* "Hello"(5) + "世界"(4) + "!"(1) = 10 */ cl_assert_equal_i(10, width); } + +/* + * Test the strbuf_utf8_align function with CJK characters + */ +void test_utf8_width__strbuf_utf8_align(void) +{ + struct strbuf buf = STRBUF_INIT; + + /* Test left alignment with CJK */ + strbuf_utf8_align(&buf, ALIGN_LEFT, 10, "你好"); + /* Since "你好" is 4 display columns, we need 6 more spaces to reach 10 */ + cl_assert_equal_s("你好 ", buf.buf); + strbuf_reset(&buf); + + /* Test right alignment with CJK */ + strbuf_utf8_align(&buf, ALIGN_RIGHT, 8, "世界"); + /* "世界" is 4 display columns, so we need 4 leading spaces */ + cl_assert_equal_s(" 世界", buf.buf); + strbuf_reset(&buf); + + /* Test center alignment with CJK */ + strbuf_utf8_align(&buf, ALIGN_MIDDLE, 10, "中"); + /* "中" is 2 display columns, so (10-2)/2 = 4 spaces on left, 4 on right */ + cl_assert_equal_s(" 中 ", buf.buf); + strbuf_reset(&buf); + + strbuf_utf8_align(&buf, ALIGN_MIDDLE, 5, "中"); + /* "中" is 2 display columns, so (5-2)/2 = 1 spaces on left, 2 on right */ + cl_assert_equal_s(" 中 ", buf.buf); + strbuf_reset(&buf); + + /* Test alignment that is smaller than string width */ + strbuf_utf8_align(&buf, ALIGN_LEFT, 2, "你好"); + /* Since "你好" is 4 display columns, it should not be truncated */ + cl_assert_equal_s("你好", buf.buf); + strbuf_release(&buf); +} From 388517c14ce62e1c52b091af862bbaf28dbabb7a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 17 Nov 2025 05:34:48 +0100 Subject: [PATCH 078/553] fast-import: refactor finalize_commit_buffer() In a following commit we are going to finalize commit buffers with or without signatures in order to check the signatures and possibly drop them. To do so easily and without duplication, let's refactor the current code that finalizes commit buffers into a new finalize_commit_buffer() function. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 54d3e592c6e460..493de57ef67bfb 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2815,6 +2815,18 @@ static void import_one_signature(struct signature_data *sig_sha1, die(_("parse_one_signature() returned unknown hash algo")); } +static void finalize_commit_buffer(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg) +{ + add_gpgsig_to_commit(new_data, "gpgsig ", sig_sha1); + add_gpgsig_to_commit(new_data, "gpgsig-sha256 ", sig_sha256); + + strbuf_addch(new_data, '\n'); + strbuf_addbuf(new_data, msg); +} + static void parse_new_commit(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -2950,11 +2962,8 @@ static void parse_new_commit(const char *arg) "encoding %s\n", encoding); - add_gpgsig_to_commit(&new_data, "gpgsig ", &sig_sha1); - add_gpgsig_to_commit(&new_data, "gpgsig-sha256 ", &sig_sha256); + finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); - strbuf_addch(&new_data, '\n'); - strbuf_addbuf(&new_data, &msg); free(author); free(committer); free(encoding); From cb034c020aba54360e7c19faf82021399bf131e7 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 17 Nov 2025 05:34:49 +0100 Subject: [PATCH 079/553] commit: refactor verify_commit_buffer() In a following commit, we are going to check commit signatures, but we won't have a commit yet, only a commit buffer, and we are going to discard this commit buffer if the signature is invalid. So it would be wasteful to create a commit that we might discard, just to be able to check a commit signature. It would be simpler instead to be able to check commit signatures using only a commit buffer instead of a commit. To be able to do that, let's extract some code from the check_commit_signature() function into a new verify_commit_buffer() function, and then let's make check_commit_signature() call verify_commit_buffer(). Note that this doesn't fundamentally change how check_commit_signature() works. It used to call parse_signed_commit() which calls repo_get_commit_buffer(), parse_buffer_signed_by_header() and repo_unuse_commit_buffer(). Now these 3 functions are called directly by verify_commit_buffer(). Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- commit.c | 17 +++++++++++++++-- commit.h | 7 +++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/commit.c b/commit.c index 16d91b2bfcf291..709c9eed58a790 100644 --- a/commit.c +++ b/commit.c @@ -1315,7 +1315,8 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h free(buf); } -int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc) { struct strbuf payload = STRBUF_INIT; struct strbuf signature = STRBUF_INIT; @@ -1323,7 +1324,8 @@ int check_commit_signature(const struct commit *commit, struct signature_check * sigc->result = 'N'; - if (parse_signed_commit(commit, &payload, &signature, the_hash_algo) <= 0) + if (parse_buffer_signed_by_header(buffer, size, &payload, + &signature, the_hash_algo) <= 0) goto out; sigc->payload_type = SIGNATURE_PAYLOAD_COMMIT; @@ -1337,6 +1339,17 @@ int check_commit_signature(const struct commit *commit, struct signature_check * return ret; } +int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +{ + unsigned long size; + const char *buffer = repo_get_commit_buffer(the_repository, commit, &size); + int ret = verify_commit_buffer(buffer, size, sigc); + + repo_unuse_commit_buffer(the_repository, commit, buffer); + + return ret; +} + void verify_merge_signature(struct commit *commit, int verbosity, int check_trust) { diff --git a/commit.h b/commit.h index 1d6e0c7518b3bb..5406dd266327d4 100644 --- a/commit.h +++ b/commit.h @@ -333,6 +333,13 @@ int remove_signature(struct strbuf *buf); */ int check_commit_signature(const struct commit *commit, struct signature_check *sigc); +/* + * Same as check_commit_signature() but accepts a commit buffer and + * its size, instead of a `struct commit *`. + */ +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc); + /* record author-date for each commit object */ struct author_date_slab; void record_author_date(struct author_date_slab *author_date, From 881793c4f71c84e70af256c5721475c7c088b3f7 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 17 Nov 2025 08:04:31 +0000 Subject: [PATCH 080/553] xdiff: add 'minimal' to XDF_DIFF_ALGORITHM_MASK The XDF_DIFF_ALGORITHM_MASK bit mask only includes bits for the patience and histogram diffs, not for the minimal one. This means that when reseting the diff algorithm to the default one, one needs to separately clear the bit for the minimal diff. There are places in the code that fail to do that: merge-ort.c and builtin/merge-file.c. Add the XDF_NEED_MINIMAL bit to the bit mask, and remove the separate clearing of this bit in the places where it hasn't been forgotten. Signed-off-by: Antonin Delpeuch Signed-off-by: Junio C Hamano --- diff.c | 1 - merge-ort.c | 1 - xdiff/xdiff.h | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/diff.c b/diff.c index a74e701806be52..0eec4a2fe8ceee 100644 --- a/diff.c +++ b/diff.c @@ -3527,7 +3527,6 @@ static int set_diff_algorithm(struct diff_options *opts, return -1; /* clear out previous settings */ - DIFF_XDL_CLR(opts, NEED_MINIMAL); opts->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; opts->xdl_opts |= value; diff --git a/merge-ort.c b/merge-ort.c index 29858074f9d8bf..23e2b64c79bf27 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5496,7 +5496,6 @@ int parse_merge_opt(struct merge_options *opt, const char *s) if (value < 0) return -1; /* clear out previous settings */ - DIFF_XDL_CLR(opt, NEED_MINIMAL); opt->xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; opt->xdl_opts |= value; } diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 2cecde5afe5da1..dc370712e92860 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -43,7 +43,7 @@ extern "C" { #define XDF_PATIENCE_DIFF (1 << 14) #define XDF_HISTOGRAM_DIFF (1 << 15) -#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF) +#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF | XDF_NEED_MINIMAL) #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK) #define XDF_INDENT_HEURISTIC (1 << 23) From ffffb987fcd3b3d6b88aceed87000ef4a5b6114e Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Mon, 17 Nov 2025 08:04:32 +0000 Subject: [PATCH 081/553] blame: make diff algorithm configurable The diff algorithm used in 'git-blame(1)' is set to 'myers', without the possibility to change it aside from the `--minimal` option. There has been long-standing interest in changing the default diff algorithm to "histogram", and Git 3.0 was floated as a possible occasion for taking some steps towards that: https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/ As a preparation for this move, it is worth making sure that the diff algorithm is configurable where useful. Make it configurable in the `git-blame(1)` command by introducing the `--diff-algorithm` option and make honor the `diff.algorithm` config variable. Keep Myers diff as the default. Signed-off-by: Antonin Delpeuch Signed-off-by: Junio C Hamano --- Documentation/diff-algorithm-option.adoc | 20 +++ Documentation/diff-options.adoc | 21 +-- Documentation/git-blame.adoc | 2 + builtin/blame.c | 52 +++++- t/meson.build | 1 + t/t8015-blame-diff-algorithm.sh | 203 +++++++++++++++++++++++ 6 files changed, 278 insertions(+), 21 deletions(-) create mode 100644 Documentation/diff-algorithm-option.adoc create mode 100755 t/t8015-blame-diff-algorithm.sh diff --git a/Documentation/diff-algorithm-option.adoc b/Documentation/diff-algorithm-option.adoc new file mode 100644 index 00000000000000..8e3a0b63d784d8 --- /dev/null +++ b/Documentation/diff-algorithm-option.adoc @@ -0,0 +1,20 @@ +`--diff-algorithm=(patience|minimal|histogram|myers)`:: + Choose a diff algorithm. The variants are as follows: ++ +-- + `default`;; + `myers`;; + The basic greedy diff algorithm. Currently, this is the default. + `minimal`;; + Spend extra time to make sure the smallest possible diff is + produced. + `patience`;; + Use "patience diff" algorithm when generating patches. + `histogram`;; + This algorithm extends the patience algorithm to "support + low-occurrence common elements". +-- ++ +For instance, if you configured the `diff.algorithm` variable to a +non-default value and want to use the default one, then you +have to use `--diff-algorithm=default` option. diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc index ae31520f7f1d13..9cdad6f72a0c7d 100644 --- a/Documentation/diff-options.adoc +++ b/Documentation/diff-options.adoc @@ -197,26 +197,7 @@ and starts with __, this algorithm attempts to prevent it from appearing as a deletion or addition in the output. It uses the "patience diff" algorithm internally. -`--diff-algorithm=(patience|minimal|histogram|myers)`:: - Choose a diff algorithm. The variants are as follows: -+ --- - `default`;; - `myers`;; - The basic greedy diff algorithm. Currently, this is the default. - `minimal`;; - Spend extra time to make sure the smallest possible diff is - produced. - `patience`;; - Use "patience diff" algorithm when generating patches. - `histogram`;; - This algorithm extends the patience algorithm to "support - low-occurrence common elements". --- -+ -For instance, if you configured the `diff.algorithm` variable to a -non-default value and want to use the default one, then you -have to use `--diff-algorithm=default` option. +include::diff-algorithm-option.adoc[] `--stat[=[,[,]]]`:: Generate a diffstat. By default, as much space as necessary diff --git a/Documentation/git-blame.adoc b/Documentation/git-blame.adoc index e438d286258826..adcbb6f5dc97a3 100644 --- a/Documentation/git-blame.adoc +++ b/Documentation/git-blame.adoc @@ -85,6 +85,8 @@ include::blame-options.adoc[] Ignore whitespace when comparing the parent's version and the child's to find where the lines came from. +include::diff-algorithm-option.adoc[] + --abbrev=:: Instead of using the default 7+1 hexadecimal digits as the abbreviated object name, use +1 digits, where is at diff --git a/builtin/blame.c b/builtin/blame.c index 2703820258d5f9..27b513d27faddd 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value, } } + if (!strcmp(var, "diff.algorithm")) { + long diff_algorithm; + if (!value) + return config_error_nonbool(var); + diff_algorithm = parse_algorithm_value(value); + if (diff_algorithm < 0) + return error(_("unknown value for config '%s': %s"), + var, value); + xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK; + xdl_opts |= diff_algorithm; + return 0; + } + if (git_diff_heuristic_config(var, value, cb) < 0) return -1; if (userdiff_config(var, value) < 0) @@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int return 0; } +static int blame_diff_algorithm_minimal(const struct option *option, + const char *arg, int unset) +{ + int *opt = option->value; + + BUG_ON_OPT_ARG(arg); + + *opt &= ~XDF_DIFF_ALGORITHM_MASK; + if (!unset) + *opt |= XDF_NEED_MINIMAL; + + return 0; +} + +static int blame_diff_algorithm_callback(const struct option *option, + const char *arg, int unset) +{ + int *opt = option->value; + long value = parse_algorithm_value(arg); + + BUG_ON_OPT_NEG(unset); + + if (value < 0) + return error(_("option diff-algorithm accepts \"myers\", " + "\"minimal\", \"patience\" and \"histogram\"")); + + *opt &= ~XDF_DIFF_ALGORITHM_MASK; + *opt |= value; + + return 0; +} + static int is_a_rev(const char *name) { struct object_id oid; @@ -915,11 +960,16 @@ int cmd_blame(int argc, OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR), OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL), OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE), + OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_(""), + N_("choose a diff algorithm"), + PARSE_OPT_NONEG, blame_diff_algorithm_callback), OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore when blaming")), OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from ")), OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE), OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR), - OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL), + OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL, + N_("spend extra cycles to find a better match"), + PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal), OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from instead of calling git-rev-list")), OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use 's contents as the final image")), OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback), diff --git a/t/meson.build b/t/meson.build index 401b24e50e0499..9f2fe7af8ba4c9 100644 --- a/t/meson.build +++ b/t/meson.build @@ -955,6 +955,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8015-blame-diff-algorithm.sh', 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', diff --git a/t/t8015-blame-diff-algorithm.sh b/t/t8015-blame-diff-algorithm.sh new file mode 100755 index 00000000000000..cd709536c6ecb1 --- /dev/null +++ b/t/t8015-blame-diff-algorithm.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='git blame with specific diff algorithm' + +. ./test-lib.sh + +test_expect_success setup ' + cat >file.c <<-\EOF && + int f(int x, int y) + { + if (x == 0) + { + return y; + } + return x; + } + + int g(size_t u) + { + while (u < 30) + { + u++; + } + return u; + } + EOF + test_write_lines x x x x >file.txt && + git add file.c file.txt && + GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 && + + cat >file.c <<-\EOF && + int g(size_t u) + { + while (u < 30) + { + u++; + } + return u; + } + + int h(int x, int y, int z) + { + if (z == 0) + { + return x; + } + return y; + } + EOF + test_write_lines x x x A B C D x E F G >file.txt && + git add file.c file.txt && + GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2 +' + +test_expect_success 'blame uses Myers diff algorithm by default' ' + cat >expected <<-\EOF && + Commit_2 int g(size_t u) + Commit_1 { + Commit_2 while (u < 30) + Commit_1 { + Commit_2 u++; + Commit_1 } + Commit_2 return u; + Commit_1 } + Commit_1 + Commit_2 int h(int x, int y, int z) + Commit_1 { + Commit_2 if (z == 0) + Commit_1 { + Commit_2 return x; + Commit_1 } + Commit_2 return y; + Commit_1 } + EOF + + git blame file.c >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts && + sed -e "s/ *$//g" without_varying_parts >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors --diff-algorithm option' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git blame file.c --diff-algorithm histogram >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts && + sed -e "s/ *$//g" without_varying_parts >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors diff.algorithm config variable' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git -c diff.algorithm=histogram blame file.c >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \ + -e "s/ *$//g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' ' + cat >expected <<-\EOF && + Commit_1 int g(size_t u) + Commit_1 { + Commit_1 while (u < 30) + Commit_1 { + Commit_1 u++; + Commit_1 } + Commit_1 return u; + Commit_1 } + Commit_2 + Commit_2 int h(int x, int y, int z) + Commit_2 { + Commit_2 if (z == 0) + Commit_2 { + Commit_2 return x; + Commit_2 } + Commit_2 return y; + Commit_2 } + EOF + + git -c diff.algorithm=myers blame file.c --diff-algorithm histogram >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \ + -e "s/ *$//g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame honors --minimal option' ' + cat >expected <<-\EOF && + Commit_1 x + Commit_1 x + Commit_1 x + Commit_2 A + Commit_2 B + Commit_2 C + Commit_2 D + Commit_1 x + Commit_2 E + Commit_2 F + Commit_2 G + EOF + + git blame file.txt --minimal >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual && + test_cmp expected actual +' + +test_expect_success 'blame respects the order of diff options' ' + cat >expected <<-\EOF && + Commit_1 x + Commit_1 x + Commit_1 x + Commit_2 A + Commit_2 B + Commit_2 C + Commit_2 D + Commit_2 x + Commit_2 E + Commit_2 F + Commit_2 G + EOF + + git blame file.txt --minimal --diff-algorithm myers >output && + sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual && + test_cmp expected actual +' + +test_done From f18aa68861538e93421699aa366d6691a85258b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 17 Nov 2025 20:42:55 +0100 Subject: [PATCH 082/553] wrapper: simplify xmkstemp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call xmkstemp_mode() instead of duplicating its error handling code. This switches the implementation from the system's mkstemp(3) to our own git_mkstemp_mode(), which works just as well. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- wrapper.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/wrapper.c b/wrapper.c index 2f00d2ac876c16..bfe7e30f0c80e8 100644 --- a/wrapper.c +++ b/wrapper.c @@ -421,24 +421,7 @@ FILE *fopen_or_warn(const char *path, const char *mode) int xmkstemp(char *filename_template) { - int fd; - char origtemplate[PATH_MAX]; - strlcpy(origtemplate, filename_template, sizeof(origtemplate)); - - fd = mkstemp(filename_template); - if (fd < 0) { - int saved_errno = errno; - const char *nonrelative_template; - - if (strlen(filename_template) != strlen(origtemplate)) - filename_template = origtemplate; - - nonrelative_template = absolute_path(filename_template); - errno = saved_errno; - die_errno("Unable to create temporary file '%s'", - nonrelative_template); - } - return fd; + return xmkstemp_mode(filename_template, 0600); } /* Adapted from libiberty's mkstemp.c. */ From c64eb849b14e5a78864f4260ccc12f46052020ec Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 Nov 2025 19:51:26 +0000 Subject: [PATCH 083/553] make strip: include `scalar` When Scalar was made a canonical part of Git in 7b5c93c6c68 (scalar: include in standard Git build & installation, 2022-09-02), it was added to all relevant Makefile targets except for the `strip` target. Let's correct that. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70d1543b6b8688..fffb0d66e39842 100644 --- a/Makefile +++ b/Makefile @@ -2499,7 +2499,7 @@ please_set_SHELL_PATH_to_a_more_modern_shell: shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell -strip: $(PROGRAMS) git$X +strip: $(PROGRAMS) git$X scalar$X $(STRIP) $(STRIP_OPTS) $^ ### Target-specific flags and dependencies From ffe702b3edf85aa924d685a8603205d47e94e851 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 3 Nov 2025 18:01:46 +0000 Subject: [PATCH 084/553] t6429: update comment to mention correct tool A comment at the top of t6429 mentions why the test doesn't exercise git rebase or git cherry-pick. However, it claims that it uses `test-tool fast-rebase`. That was true when the comment was written, but commit f920b0289ba3 (replay: introduce new builtin, 2023-11-24) changed it to use git replay without updating this comment. We could potentially just strike this second comment, since git replay is a bona fide built-in, but perhaps the explanation about why it focuses on git replay is still useful. Update the comment to make it accurate again. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t6429-merge-sequence-rename-caching.sh | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/t/t6429-merge-sequence-rename-caching.sh b/t/t6429-merge-sequence-rename-caching.sh index 0f39ed0d08a342..dcb734b10b3e2b 100755 --- a/t/t6429-merge-sequence-rename-caching.sh +++ b/t/t6429-merge-sequence-rename-caching.sh @@ -11,14 +11,13 @@ test_description="remember regular & dir renames in sequence of merges" # sure that we are triggering rename caching rather than rename # bypassing. # -# NOTE 2: this testfile uses 'test-tool fast-rebase' instead of either -# cherry-pick or rebase. sequencer.c is only superficially -# integrated with merge-ort; it calls merge_switch_to_result() -# after EACH merge, which updates the index and working copy AND -# throws away the cached results (because merge_switch_to_result() -# is only supposed to be called at the end of the sequence). -# Integrating them more deeply is a big task, so for now the tests -# use 'test-tool fast-rebase'. +# NOTE 2: this testfile uses replay instead of either cherry-pick or rebase. +# sequencer.c is only superficially integrated with merge-ort; it +# calls merge_switch_to_result() after EACH merge, which updates the +# index and working copy AND throws away the cached results (because +# merge_switch_to_result() is only supposed to be called at the end +# of the sequence). Integrating them more deeply is a big task, so +# for now the tests use 'git replay'. # From d5663a4b05640a44aa52a0cc32ba6a601d7c9149 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 3 Nov 2025 18:01:47 +0000 Subject: [PATCH 085/553] merge-ort: remove debugging crud While developing commit a16e8efe5c2b (merge-ort: fix merge.directoryRenames=false, 2025-03-13), I was testing things out and had an extra condition on one of the if-blocks that I occasionally swapped between '&& 0' and '&& 1' to see the effects of the changes. I forgot to remove it before submitting and it wasn't caught in review. Remove it now. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 29858074f9d8bf..23b55c5b929b22 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -3438,7 +3438,7 @@ static int collect_renames(struct merge_options *opt, continue; } if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_NONE && - p->status == 'R' && 1) { + p->status == 'R') { possibly_cache_new_pair(renames, p, side_index, NULL); goto skip_directory_renames; } From a562d90a350dcbddc8794809aef9608467022e34 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 3 Nov 2025 18:01:48 +0000 Subject: [PATCH 086/553] merge-ort: fix failing merges in special corner case At GitHub, we had a repository that was triggering git: merge-ort.c:3032: process_renames: Assertion `newinfo && !newinfo->merged.clean` failed. during git replay. This sounds similar to the somewhat recent f6ecb603ff8a (merge-ort: fix directory rename on top of source of other rename/delete, 2025-08-06), but the cause is different. Unlike that case, there are no rename-to-self situations arising in this case, and new to this case it can only be triggered during a replay operation on the 2nd or later commit being replayed, never on the first merge in the sequence. To trigger, the repository needs: * an upstream which: * renames a file to a different directory, e.g. old/file -> new/file * leaves other files remaining in the original directory (so that e.g. "old/" still exists upstream even though file has been removed from it and placed elsewhere) * a topic branch being rebased where: * a commit in the sequence: * modifies old/file * a subsequent commit in the sequence being replayed: * does NOT touch *anything* under new/ * does NOT touch old/file * DOES modify other paths under old/ * does NOT have any relevant renames that we need to detect _anywhere_ elsewhere in the tree (meaning this interacts interestingly with both directory renames and cached renames) In such a case, the assertion will trigger. The fix turns out to be surprisingly simple. I have a very vague recollection that I actually considered whether to add such an if-check years ago when I added the very similar one for oldinfo in 1b6b902d95a5 (merge-ort: process_renames() now needs more defensiveness, 2021-01-19), but I think I couldn't figure out a possible way to trigger it and was worried at the time that if I didn't know how to trigger it then I wasn't so sure that simply skipping it was correct. Waiting did give me a chance to put more thorough tests and checks into place for the rename-to-self cases a few months back, which I might not have found as easily otherwise. Anyway, put the check in place now and add a test that demonstrates the fix. Note that this bug, as demonstrated by the conditions listed above, runs at the intersection of relevant renames, trivial directory resolutions, and cached renames. All three of those optimizations are ones that unfortunately make the code (and testcases!) a bit more complex, and threading all three makes it a bit more so. However, the testcase isn't crazy enough that I'd expect no one to ever hit it in practice, and was confused why we didn't see it before. After some digging, I discovered that merge.directoryRenames=false is a workaround to this bug, and GitHub used that setting until recently (it was a "temporary" match-what-libgit2-does piece of code that lasted years longer than intended). Since the conditions I gave above for triggering this bug rule out the possibility of there being directory renames, one might assume that it shouldn't matter whether you try to detect such renames if there aren't any. However, due to commit a16e8efe5c2b (merge-ort: fix merge.directoryRenames=false, 2025-03-13), the heavy hammer used there means that merge.directoryRenames=false ALSO turns off rename caching, which is critical to triggering the bug. This becomes a bit more than an aside since... Re-reading that old commit, a16e8efe5c2b (merge-ort: fix merge.directoryRenames=false, 2025-03-13), it appears that the solution to this latest bug might have been at least a partial alternative solution to that old commit. And it may have been an improved alternative (or at least help implement one), since it may be able to avoid the heavy-handed disabling of rename cache. That might be an interesting future thing to investigate, but is not critical for the current fix. However, since I spent time digging it all up, at least leave a small comment tweak breadcrumb to help some future reader (myself or others) who wants to dig further to connect the dots a little quicker. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 29 ++++++++- t/t6429-merge-sequence-rename-caching.sh | 78 ++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 23b55c5b929b22..a1f3333e44a1ef 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2912,6 +2912,32 @@ static int process_renames(struct merge_options *opt, if (!oldinfo || oldinfo->merged.clean) continue; + /* + * Rename caching from a previous commit might give us an + * irrelevant rename for the current commit. + * + * Imagine: + * foo/A -> bar/A + * was a cached rename for the upstream side from the + * previous commit (without the directories being renamed), + * but the next commit being replayed + * * does NOT add or delete files + * * does NOT have directory renames + * * does NOT modify any files under bar/ + * * does NOT modify foo/A + * * DOES modify other files under foo/ (otherwise the + * !oldinfo check above would have already exited for + * us) + * In such a case, our trivial directory resolution will + * have already merged bar/, and our attempt to process + * the cached + * foo/A -> bar/A + * would be counterproductive, and lack the necessary + * information anyway. Skip such renames. + */ + if (!newinfo) + continue; + /* * diff_filepairs have copies of pathnames, thus we have to * use standard 'strcmp()' (negated) instead of '=='. @@ -5118,7 +5144,8 @@ static void merge_check_renames_reusable(struct merge_options *opt, * optimization" comment near that case). * * This could be revisited in the future; see the commit message - * where this comment was added for some possible pointers. + * where this comment was added for some possible pointers, or the + * later commit where this comment was added. */ if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_NONE) { renames->cached_pairs_valid_side = 0; /* neither side valid */ diff --git a/t/t6429-merge-sequence-rename-caching.sh b/t/t6429-merge-sequence-rename-caching.sh index dcb734b10b3e2b..15dd2d94b75f0a 100755 --- a/t/t6429-merge-sequence-rename-caching.sh +++ b/t/t6429-merge-sequence-rename-caching.sh @@ -768,4 +768,82 @@ test_expect_success 'avoid assuming we detected renames' ' ) ' +# +# In the following testcase: +# Base: olddir/{valuesX_1, valuesY_1, valuesZ_1} +# other/content +# Upstream: rename olddir/valuesX_1 -> newdir/valuesX_2 +# Topic_1: modify olddir/valuesX_1 -> olddir/valuesX_3 +# Topic_2: modify olddir/valuesY, +# modify other/content +# Expected Pick1: olddir/{valuesY, valuesZ}, newdir/valuesX, other/content +# Expected Pick2: olddir/{valuesY, valuesZ}, newdir/valuesX, other/content +# +# This testcase presents no problems for git traditionally, but the fact that +# olddir/valuesX -> newdir/valuesX +# gets cached after the first pick presents a problem for the second commit to +# be replayed, because it appears to be an irrelevant rename, so the trivial +# directory resolution will resolve newdir/ without recursing into it, giving +# us no way to apply the cached rename to anything. +# +test_expect_success 'rename a file, use it on first pick, but irrelevant on second' ' + git init rename_a_file_use_it_once_irrelevant_on_second && + ( + cd rename_a_file_use_it_once_irrelevant_on_second && + + mkdir olddir/ other/ && + test_seq 3 8 >olddir/valuesX && + test_seq 3 8 >olddir/valuesY && + test_seq 3 8 >olddir/valuesZ && + printf "%s\n" A B C D E F G >other/content && + git add olddir other && + git commit -m orig && + + git branch upstream && + git branch topic && + + git switch upstream && + test_seq 1 8 >olddir/valuesX && + git add olddir && + mkdir newdir && + git mv olddir/valuesX newdir && + git commit -m "Renamed (and modified) olddir/valuesX into newdir/" && + + git switch topic && + + test_seq 3 10 >olddir/valuesX && + git add olddir && + git commit -m A && + + test_seq 1 8 >olddir/valuesY && + printf "%s\n" A B C D E F G H I >other/content && + git add olddir/valuesY other && + git commit -m B && + + # + # Actual testing; mostly we want to verify that we do not hit + # git: merge-ort.c:3032: process_renames: Assertion `newinfo && !newinfo->merged.clean` failed. + # + + git switch upstream && + git config merge.directoryRenames true && + + git replay --onto HEAD upstream~1..topic >out && + + # + # ...but we may as well check that the replay gave us a reasonable result + # + + git update-ref --stdin tracked && + test_line_count = 4 tracked && + test_path_is_file newdir/valuesX && + test_path_is_file olddir/valuesY && + test_path_is_file olddir/valuesZ && + test_path_is_file other/content + ) +' + test_done From d22a488482092da64ad19fda82edde199bed2466 Mon Sep 17 00:00:00 2001 From: David Macek Date: Mon, 17 Nov 2025 20:39:44 +0000 Subject: [PATCH 087/553] wincred: avoid memory corruption `wcsncpy_s()` wants to write the terminating null character so we need to allocate one more space for it in the target memory block. This should fix crashes when trying to read passwords. When this happened, the password/token wouldn't print out and Git would therefore ask for a new password every time. Signed-off-by: David Macek Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/credential/wincred/git-credential-wincred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/credential/wincred/git-credential-wincred.c b/contrib/credential/wincred/git-credential-wincred.c index 5683846b4b4d1f..73c2b9b72ab53e 100644 --- a/contrib/credential/wincred/git-credential-wincred.c +++ b/contrib/credential/wincred/git-credential-wincred.c @@ -165,7 +165,7 @@ static void get_credential(void) write_item("username", creds[i]->UserName, creds[i]->UserName ? wcslen(creds[i]->UserName) : 0); if (creds[i]->CredentialBlobSize > 0) { - secret = xmalloc(creds[i]->CredentialBlobSize); + secret = xmalloc(creds[i]->CredentialBlobSize + sizeof(WCHAR)); wcsncpy_s(secret, creds[i]->CredentialBlobSize, (LPCWSTR)creds[i]->CredentialBlob, creds[i]->CredentialBlobSize / sizeof(WCHAR)); line = wcstok_s(secret, L"\r\n", &remaining_lines); write_item("password", line, line ? wcslen(line) : 0); From b0d5c88cca3d67fd020d1e71fb04380cd15f5e55 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 Nov 2025 20:40:08 +0000 Subject: [PATCH 088/553] cmake: stop trying to build the reftable and xdiff libraries In the `en/make-libgit-a` topic branch, more precisely in the commits f3b4c89d59f1 (make: delete REFTABLE_LIB, add reftable to LIB_OBJS, 2025-10-02) and cf680cdb9543 (make: delete XDIFF_LIB, add xdiff to LIB_OBJS, 2025-10-02), the strategy to build three static libraries was rethought, and instead only one static library is now built. This is good. However, the CMake definition was not changed accordingly, and now CMake-based builds fail thusly: [...] Generating hook-list.h CMake Error at CMakeLists.txt:122 (string): string sub-command REPLACE requires at least four arguments. Call Stack (most recent call first): CMakeLists.txt:711 (parse_makefile_for_sources) CMake Error at CMakeLists.txt:122 (string): string sub-command REPLACE requires at least four arguments. Call Stack (most recent call first): CMakeLists.txt:717 (parse_makefile_for_sources) -- Configuring incomplete, errors occurred! Fix that by removing the parts that expect the reftable and xdiff objects to be defined separately in the Makefile, still. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/buildsystems/CMakeLists.txt | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index edb0fc04ad7649..479163ab5cd3b5 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -679,18 +679,6 @@ list(APPEND libgit_SOURCES "${CMAKE_BINARY_DIR}/version-def.h") add_library(libgit ${libgit_SOURCES} ${compat_SOURCES}) -#libxdiff -parse_makefile_for_sources(libxdiff_SOURCES ${CMAKE_SOURCE_DIR}/Makefile "XDIFF_OBJS") - -list(TRANSFORM libxdiff_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/") -add_library(xdiff STATIC ${libxdiff_SOURCES}) - -#reftable -parse_makefile_for_sources(reftable_SOURCES ${CMAKE_SOURCE_DIR}/Makefile "REFTABLE_OBJS") - -list(TRANSFORM reftable_SOURCES PREPEND "${CMAKE_SOURCE_DIR}/") -add_library(reftable STATIC ${reftable_SOURCES}) - if(WIN32) add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/git.rc COMMAND "${SH_EXE}" "${CMAKE_SOURCE_DIR}/GIT-VERSION-GEN" @@ -720,7 +708,7 @@ endif() #link all required libraries to common-main add_library(common-main OBJECT ${CMAKE_SOURCE_DIR}/common-main.c) -target_link_libraries(common-main libgit xdiff reftable ${ZLIB_LIBRARIES}) +target_link_libraries(common-main libgit ${ZLIB_LIBRARIES}) if(Intl_FOUND) target_link_libraries(common-main ${Intl_LIBRARIES}) endif() From af3919816f20c7c54e6d377945b2f6344b3588fe Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 Nov 2025 20:46:14 +0000 Subject: [PATCH 089/553] mingw: avoid the comma operator The pattern `return errno = ..., -1;` is observed several times in `compat/mingw.c`. It has served us well over the years, but now clang starts complaining: compat/mingw.c:723:24: error: possible misuse of comma operator here [-Werror,-Wcomma] 723 | return errno = ENOSYS, -1; | ^ See for example this failing workflow run: https://github.com/git-for-windows/git-sdk-arm64/actions/runs/15457893907/job/43513458823#step:8:201 Let's appease clang (and also reduce the use of the no longer common comma operator). Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 8538e3d1729d25..9b894d9639b642 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -491,8 +491,10 @@ static int mingw_open_append(wchar_t const *wfilename, int oflags, ...) DWORD create = (oflags & O_CREAT) ? OPEN_ALWAYS : OPEN_EXISTING; /* only these flags are supported */ - if ((oflags & ~O_CREAT) != (O_WRONLY | O_APPEND)) - return errno = ENOSYS, -1; + if ((oflags & ~O_CREAT) != (O_WRONLY | O_APPEND)) { + errno = ENOSYS; + return -1; + } /* * FILE_SHARE_WRITE is required to permit child processes @@ -2450,12 +2452,14 @@ static int start_timer_thread(void) timer_event = CreateEvent(NULL, FALSE, FALSE, NULL); if (timer_event) { timer_thread = (HANDLE) _beginthreadex(NULL, 0, ticktack, NULL, 0, NULL); - if (!timer_thread ) - return errno = ENOMEM, - error("cannot start timer thread"); - } else - return errno = ENOMEM, - error("cannot allocate resources for timer"); + if (!timer_thread ) { + errno = ENOMEM; + return error("cannot start timer thread"); + } + } else { + errno = ENOMEM; + return error("cannot allocate resources for timer"); + } return 0; } @@ -2488,13 +2492,15 @@ int setitimer(int type UNUSED, struct itimerval *in, struct itimerval *out) static const struct timeval zero; static int atexit_done; - if (out) - return errno = EINVAL, - error("setitimer param 3 != NULL not implemented"); + if (out) { + errno = EINVAL; + return error("setitimer param 3 != NULL not implemented"); + } if (!is_timeval_eq(&in->it_interval, &zero) && - !is_timeval_eq(&in->it_interval, &in->it_value)) - return errno = EINVAL, - error("setitimer: it_interval must be zero or eq it_value"); + !is_timeval_eq(&in->it_interval, &in->it_value)) { + errno = EINVAL; + return error("setitimer: it_interval must be zero or eq it_value"); + } if (timer_thread) stop_timer_thread(); @@ -2516,12 +2522,14 @@ int sigaction(int sig, struct sigaction *in, struct sigaction *out) { if (sig == SIGCHLD) return -1; - else if (sig != SIGALRM) - return errno = EINVAL, - error("sigaction only implemented for SIGALRM"); - if (out) - return errno = EINVAL, - error("sigaction: param 3 != NULL not implemented"); + else if (sig != SIGALRM) { + errno = EINVAL; + return error("sigaction only implemented for SIGALRM"); + } + if (out) { + errno = EINVAL; + return error("sigaction: param 3 != NULL not implemented"); + } timer_fn = in->sa_handler; return 0; From cd99203f86ea32e0b84d1ef18f5148b74972f617 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 18 Nov 2025 02:26:45 -0500 Subject: [PATCH 090/553] ci: bump actions/setup-go from 5 to 6 Bumps actions/setup-go from 5 to 6. This upgrade includes dependency updates that incorporate a fix for a critical vulnerability. [Originally opened at https://github.com/git-for-windows/git/pull/5811] - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v5...v6) Originally-authored-by: dependabot[bot] Signed-off-by: Johannes Schindelin Signed-off-by: Jiang Xin Signed-off-by: Junio C Hamano --- .github/workflows/l10n.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/l10n.yml b/.github/workflows/l10n.yml index e2c3dbdcb50f0c..95e55134bdbed4 100644 --- a/.github/workflows/l10n.yml +++ b/.github/workflows/l10n.yml @@ -63,7 +63,7 @@ jobs: origin \ ${{ github.ref }} \ $args - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: go-version: '>=1.16' cache: false From 65e8141f051401a101567b95860424d94f42ef39 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:11:56 -0500 Subject: [PATCH 091/553] compat/mmap: mark unused argument in git_munmap() Our mmap compat code emulates mapping by using malloc/free. Our git_munmap() must take a "length" parameter to match the interface of munmap(), but we don't use it (it is up to the allocator to know how big the block is in free()). Let's mark it as UNUSED to avoid complaints from -Wunused-parameter. Otherwise you cannot build with "make DEVELOPER=1 NO_MMAP=1". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/mmap.c b/compat/mmap.c index 2fe1c7732eea94..1a118711f7244a 100644 --- a/compat/mmap.c +++ b/compat/mmap.c @@ -38,7 +38,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t of return start; } -int git_munmap(void *start, size_t length) +int git_munmap(void *start, size_t length UNUSED) { free(start); return 0; From 4deb882e54152c31bef23f8b33ad38b7bc26d398 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:06 -0500 Subject: [PATCH 092/553] pack-bitmap: handle name-hash lookups in incremental bitmaps If a bitmap has a name-hash cache, it is an array of 32-bit integers, one per entry in the bitmap, which we've mmap'd from the .bitmap file. We access it directly like this: if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + index_pos); That works for both regular pack bitmaps and for non-incremental midx bitmaps. There is one bitmap_index with one "hashes" array, and index_pos is within its bounds (we do the bounds-checking when we load the bitmap). But for an incremental midx bitmap, we have a linked list of bitmap_index structs, and each one has only its own small slice of the name-hash array. If index_pos refers to an object that is not in the first bitmap_git of the chain, then we'll access memory outside of the bounds of its "hashes" array, and often outside of the mmap. Instead, we should walk through the list until we find the bitmap_index which serves our index_pos, and use its hash (after adjusting index_pos to make it relative to the slice we found). This is exactly what we do elsewhere for incremental midx lookups (like the pack_pos_to_midx() call a few lines above). But we can't use existing helpers like midx_for_object() here, because we're walking through the chain of bitmap_index structs (each of which refers to a midx), not the chain of incremental multi_pack_index structs themselves. The problem is triggered in the test suite, but we don't get a segfault because the out-of-bounds index is too small. The OS typically rounds our mmap up to the nearest page size, so we just end up accessing some extra zero'd memory. Nor do we catch it with ASan, since it doesn't seem to instrument mmaps at all. But if we build with NO_MMAP, then our maps are replaced with heap allocations, which ASan does check. And so: make NO_MMAP=1 SANITIZE=address cd t ./t5334-incremental-multi-pack-index.sh does show the problem (and this patch makes it go away). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index ac6d62b980c5a8..9c9aa7b67c3197 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -212,6 +212,28 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index) return index->pack->num_objects; } +static uint32_t bitmap_name_hash(struct bitmap_index *index, uint32_t pos) +{ + if (bitmap_is_midx(index)) { + while (index && pos < index->midx->num_objects_in_base) { + ASSERT(bitmap_is_midx(index)); + index = index->base; + } + + if (!index) + BUG("NULL base bitmap for object position: %"PRIu32, pos); + + pos -= index->midx->num_objects_in_base; + if (pos >= index->midx->num_objects) + BUG("out-of-bounds midx bitmap object at %"PRIu32, pos); + } + + if (!index->hashes) + return 0; + + return get_be32(index->hashes + pos); +} + static struct repository *bitmap_repo(struct bitmap_index *bitmap_git) { if (bitmap_is_midx(bitmap_git)) @@ -1726,8 +1748,7 @@ static void show_objects_for_type( pack = bitmap_git->pack; } - if (bitmap_git->hashes) - hash = get_be32(bitmap_git->hashes + index_pos); + hash = bitmap_name_hash(bitmap_git, index_pos); show_reach(&oid, object_type, 0, hash, pack, ofs, payload); } @@ -3083,8 +3104,8 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, if (oe) { reposition[i] = oe_in_pack_pos(mapping, oe) + 1; - if (bitmap_git->hashes && !oe->hash) - oe->hash = get_be32(bitmap_git->hashes + index_pos); + if (!oe->hash) + oe->hash = bitmap_name_hash(bitmap_git, index_pos); } } From a9990f8ec0e750a68802f7d79d2aa2293c4811b4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:13 -0500 Subject: [PATCH 093/553] Makefile: turn on NO_MMAP when building with ASan Git often uses mmap() to access on-disk files. This leaves a blind spot in our SANITIZE=address builds, since ASan does not seem to handle mmap at all. Nor does the OS notice most out-of-bounds access, since it tends to round up to the nearest page size (so depending on how big the map is, you might have to overrun it by up to 4095 bytes to trigger a segfault). The previous commit demonstrates a memory bug that we missed. We could have made a new test where the out-of-bounds access was much larger, or where the mapped file ended closer to a page boundary. But the point of running the test suite with sanitizers is to catch these problems without having to construct specific tests. Let's enable NO_MMAP for our ASan builds by default, which should give us better coverage. This does increase the memory usage of Git, since we're copying from the filesystem into heap. But the repositories in the test suite tend to be small, so the overhead isn't really noticeable (and ASan already has quite a performance penalty). There are a few other known bugs that this patch will help flush out. However, they aren't directly triggered in the test suite (yet). So it's safe to turn this on now without breaking the test suite, which will help us add new tests to demonstrate those other bugs as we fix them. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 1 + meson.build | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70d1543b6b8688..c2d327838a64cb 100644 --- a/Makefile +++ b/Makefile @@ -1518,6 +1518,7 @@ SANITIZE_LEAK = YesCompiledWithIt endif ifneq ($(filter address,$(SANITIZERS)),) NO_REGEX = NeededForASAN +NO_MMAP = NeededForASAN SANITIZE_ADDRESS = YesCompiledWithIt endif endif diff --git a/meson.build b/meson.build index 596f5ac7110ebf..269769b166a8c7 100644 --- a/meson.build +++ b/meson.build @@ -1369,12 +1369,18 @@ if host_machine.system() == 'windows' libgit_c_args += '-DUSE_WIN32_MMAP' else checkfuncs += { - 'mmap' : ['mmap.c'], # provided by compat/mingw.c. 'unsetenv' : ['unsetenv.c'], # provided by compat/mingw.c. 'getpagesize' : [], } + + if get_option('b_sanitize').contains('address') + libgit_c_args += '-DNO_MMAP' + libgit_sources += 'compat/mmap.c' + else + checkfuncs += { 'mmap': ['mmap.c'] } + endif endif foreach func, impls : checkfuncs From c4c9089584d0ed04978e8d0945b2ba2985e67bd3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:18 -0500 Subject: [PATCH 094/553] cache-tree: avoid strtol() on non-string buffer A cache-tree extension entry in the index looks like this: NUL SPACE NEWLINE where the "_nr" items are human-readable base-10 ASCII. We parse them with strtol(), even though we do not have a NUL-terminated string (we'd generally have an mmap() of the on-disk index file). For a well-formed entry, this is not a problem; strtol() will stop when it sees the newline. But there are two problems: 1. A corrupted entry could omit the newline, causing us to read further. You'd mostly get stopped by seeing non-digits in the oid field (and if it is likewise truncated, there will still be 20 or more bytes of the index checksum). So it's possible, though unlikely, to read off the end of the mmap'd buffer. Of course a malicious index file can fake the oid and the index checksum to all (ASCII) 0's. This is further complicated by the fact that mmap'd buffers tend to be zero-padded up to the page boundary. So to run off the end, the index size also has to be a multiple of the page size. This is also unlikely, though you can construct a malicious index file that matches this. The security implications aren't too interesting. The index file is a local file anyway (so you can't attack somebody by cloning, but only if you convince them to operate in a .git directory you made, at which point attacking .git/config is much easier). And it's just a read overflow via strtol(), which is unlikely to buy you much beyond a crash. 2. ASan has a strict_string_checks option, which tells it to make sure that options to string functions (like strtol) have some eventual NUL, without regard to what the function would actually do (like stopping at a newline here). This option sometimes has false positives, but it can point to sketchy areas (like this one) where the input we use doesn't exhibit a problem, but different input _could_ cause us to misbehave. Let's fix it by just parsing the values ourselves with a helper function that is careful not to go past the end of the buffer. There are a few behavior changes here that should not matter: - We do not consider overflow, as strtol() would. But nor did the original code. However, we don't trust the value we get from the on-disk file, and if it says to read 2^30 entries, we would notice that we do not have that many and bail before reading off the end of the buffer. - Our helper does not skip past extra leading whitespace as strtol() would, but according to gitformat-index(5) there should not be any. - The original quit parsing at a newline or a NUL byte, but now we insist on a newline (which is what the documentation says, and what Git has always produced). Since we are providing our own helper function, we can tweak the interface a bit to make our lives easier. The original code does not use strtol's "end" pointer to find the end of the parsed data, but rather uses a separate loop to advance our "buf" pointer to the trailing newline. We can instead provide a helper that advances "buf" as it parses, letting us read strictly left-to-right through the buffer. I didn't add a new test here. It's surprisingly difficult to construct an index of exactly the right size due to the way we pad entries. But it is easy to trigger the problem in existing tests when using ASan's strict string checking, coupled with a recent change to use NO_MMAP with ASan builds. So: make SANITIZE=address cd t ASAN_OPTIONS=strict_string_checks=1 ./t0090-cache-tree.sh triggers it reliably. Technically it is not deterministic because there is ~8% chance (it's 1-(255/256)^20, or ^32 for sha256) that the trailing checksum hash has a NUL byte in it. But we compute enough cache-trees in the course of that script that we are very likely to hit the problem in one of them. We can look at making strict_string_checks the default for ASan builds, but there are some other cases we'd want to fix first. Reported-by: correctmost Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache-tree.c | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index fa3858e2829aa8..2309911dfa0309 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -548,12 +548,41 @@ void cache_tree_write(struct strbuf *sb, struct cache_tree *root) trace2_region_leave("cache_tree", "write", the_repository); } +static int parse_int(const char **ptr, unsigned long *len_p, int *out) +{ + const char *s = *ptr; + unsigned long len = *len_p; + int ret = 0; + int sign = 1; + + while (len && *s == '-') { + sign *= -1; + s++; + len--; + } + + while (len) { + if (!isdigit(*s)) + break; + ret *= 10; + ret += *s - '0'; + s++; + len--; + } + + if (s == *ptr) + return -1; + + *ptr = s; + *len_p = len; + *out = sign * ret; + return 0; +} + static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) { const char *buf = *buffer; unsigned long size = *size_p; - const char *cp; - char *ep; struct cache_tree *it; int i, subtree_nr; const unsigned rawsz = the_hash_algo->rawsz; @@ -569,19 +598,14 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) buf++; size--; it = cache_tree(); - cp = buf; - it->entry_count = strtol(cp, &ep, 10); - if (cp == ep) + if (parse_int(&buf, &size, &it->entry_count) < 0) goto free_return; - cp = ep; - subtree_nr = strtol(cp, &ep, 10); - if (cp == ep) + if (!size || *buf != ' ') goto free_return; - while (size && *buf && *buf != '\n') { - size--; - buf++; - } - if (!size) + buf++; size--; + if (parse_int(&buf, &size, &subtree_nr) < 0) + goto free_return; + if (!size || *buf != '\n') goto free_return; buf++; size--; if (0 <= it->entry_count) { From 0b6ec075df2ac77a4792b8b1a7290a36b636012b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:20 -0500 Subject: [PATCH 095/553] fsck: assert newline presence in fsck_ident() The fsck code purports to handle buffers that are not NUL-terminated, but fsck_ident() uses some string functions. This works OK in practice, as explained in 8e4309038f (fsck: do not assume NUL-termination of buffers, 2023-01-19). Before calling fsck_ident() we'll have called verify_headers(), which makes sure we have at least a trailing newline. And none of our string-like functions will walk past that newline. However, that makes this code at the top of fsck_ident() very confusing: *ident = strchrnul(*ident, '\n'); if (**ident == '\n') (*ident)++; We should always see that newline, or our memory safety assumptions have been violated! Further, using strchrnul() is weird, since the whole point is that if the newline is not there, we don't necessarily have a NUL at all, and might read off the end of the buffer. So let's have callers pass in the boundary of our buffer, which lets us safely find the newline with memchr(). And if it is not there, this is a BUG(), because it means our caller did not validate the input with verify_headers() as it was supposed to (and we are better off bailing rather than having memory-safety problems). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fsck.c b/fsck.c index 8dc8472ceb3781..3d1027dc877424 100644 --- a/fsck.c +++ b/fsck.c @@ -859,16 +859,18 @@ static int verify_headers(const void *data, unsigned long size, FSCK_MSG_UNTERMINATED_HEADER, "unterminated header"); } -static int fsck_ident(const char **ident, +static int fsck_ident(const char **ident, const char *ident_end, const struct object_id *oid, enum object_type type, struct fsck_options *options) { const char *p = *ident; + const char *nl; char *end; - *ident = strchrnul(*ident, '\n'); - if (**ident == '\n') - (*ident)++; + nl = memchr(p, '\n', ident_end - p); + if (!nl) + BUG("verify_headers() should have made sure we have a newline"); + *ident = nl + 1; if (*p == '<') return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email"); @@ -957,7 +959,7 @@ static int fsck_commit(const struct object_id *oid, author_count = 0; while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) { author_count++; - err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); + err = fsck_ident(&buffer, buffer_end, oid, OBJ_COMMIT, options); if (err) return err; } @@ -969,7 +971,7 @@ static int fsck_commit(const struct object_id *oid, return err; if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line"); - err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); + err = fsck_ident(&buffer, buffer_end, oid, OBJ_COMMIT, options); if (err) return err; if (memchr(buffer_begin, '\0', size)) { @@ -1064,7 +1066,7 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, goto done; } else - ret = fsck_ident(&buffer, oid, OBJ_TAG, options); + ret = fsck_ident(&buffer, buffer_end, oid, OBJ_TAG, options); if (buffer < buffer_end && !starts_with(buffer, "\n")) { /* From 830424def4896dbf041d41dad873f5b86fdf9bfa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:23 -0500 Subject: [PATCH 096/553] fsck: avoid strcspn() in fsck_ident() We may be operating on a buffer that is not NUL-terminated, but we use strcspn() to parse it. This is OK in practice, as discussed in 8e4309038f (fsck: do not assume NUL-termination of buffers, 2023-01-19), because we know there is at least a trailing newline in our buffer, and we always pass "\n" to strcspn(). So we know it will stop before running off the end of the buffer. But this is a subtle point to hang our memory safety hat on. And it confuses ASan's strict_string_checks mode, even though it is technically a false positive (that mode complains that we have no NUL, which is true, but it does not know that we have verified the presence of the newline already). Let's instead open-code the loop. As a bonus, this makes the logic more obvious (to my mind, anyway). The current code skips forward with strcspn until it hits "<", ">", or "\n". But then it must check which it saw to decide if that was what we expected or not, duplicating some logic between what's in the strcspn() and what's in the domain logic. Instead, we can just check each character as we loop and act on it immediately. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/fsck.c b/fsck.c index 3d1027dc877424..3696f1b849ad9f 100644 --- a/fsck.c +++ b/fsck.c @@ -874,18 +874,30 @@ static int fsck_ident(const char **ident, const char *ident_end, if (*p == '<') return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email"); - p += strcspn(p, "<>\n"); - if (*p == '>') - return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name"); - if (*p != '<') - return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email"); + for (;;) { + if (p >= ident_end || *p == '\n') + return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email"); + if (*p == '>') + return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name"); + if (*p == '<') + break; /* end of name, beginning of email */ + + /* otherwise, skip past arbitrary name char */ + p++; + } if (p[-1] != ' ') return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email"); - p++; - p += strcspn(p, "<>\n"); - if (*p != '>') - return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email"); - p++; + p++; /* skip past '<' we found */ + for (;;) { + if (p >= ident_end || *p == '<' || *p == '\n') + return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email"); + if (*p == '>') + break; /* end of email */ + + /* otherwise, skip past arbitrary email char */ + p++; + } + p++; /* skip past '>' we found */ if (*p != ' ') return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date"); p++; From f05df7ffca492b37d604ad6beed788055eb56ebd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:25 -0500 Subject: [PATCH 097/553] fsck: remove redundant date timestamp check After calling "parse_timestamp(p, &end, 10)", we complain if "p == end", which would imply that we did not see any digits at all. But we know this cannot be the case, since we would have bailed already if we did not see any digits, courtesy of extra checks added by 8e4309038f (fsck: do not assume NUL-termination of buffers, 2023-01-19). Since then, checking "p == end" is redundant and we can drop it. This will make our lives a little easier as we refactor further. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 3696f1b849ad9f..98b16a9e584060 100644 --- a/fsck.c +++ b/fsck.c @@ -919,7 +919,7 @@ static int fsck_ident(const char **ident, const char *ident_end, return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date"); if (date_overflows(parse_timestamp(p, &end, 10))) return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow"); - if ((end == p || *end != ' ')) + if (*end != ' ') return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date"); p = end + 1; if ((*p != '+' && *p != '-') || From 5a993593b24df699f60841296795f9a6ca60d399 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:28 -0500 Subject: [PATCH 098/553] fsck: avoid parse_timestamp() on buffer that isn't NUL-terminated In fsck_ident(), we parse the timestamp with parse_timestamp(), which is really an alias for strtoumax(). But since our buffer may not be NUL-terminated, this can trigger a complaint from ASan's strict_string_checks mode. This is a false positive, since we know that the buffer contains a trailing newline (which we checked earlier in the function), and that strtoumax() would stop there. But it is worth working around ASan's complaint. One is because that will let us turn on strict_string_checks by default, which has helped catch other real problems. And two is that the safety of the current code is very hard to reason about (it subtly depends on distant code which could change). One option here is to just parse the number left-to-right ourselves. But we care about the size of a timestamp_t and detecting overflow, since that's part of the point of these checks. And doing that correctly is tricky. So we'll instead just pull the digits into a separate, NUL-terminated buffer, and use that to call parse_timestamp(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fsck.c b/fsck.c index 98b16a9e584060..ec45f786d6ed74 100644 --- a/fsck.c +++ b/fsck.c @@ -859,13 +859,28 @@ static int verify_headers(const void *data, unsigned long size, FSCK_MSG_UNTERMINATED_HEADER, "unterminated header"); } +static timestamp_t parse_timestamp_from_buf(const char **start, const char *end) +{ + const char *p = *start; + char buf[24]; /* big enough for 2^64 */ + size_t i = 0; + + while (p < end && isdigit(*p)) { + if (i >= ARRAY_SIZE(buf) - 1) + return TIME_MAX; + buf[i++] = *p++; + } + buf[i] = '\0'; + *start = p; + return parse_timestamp(buf, NULL, 10); +} + static int fsck_ident(const char **ident, const char *ident_end, const struct object_id *oid, enum object_type type, struct fsck_options *options) { const char *p = *ident; const char *nl; - char *end; nl = memchr(p, '\n', ident_end - p); if (!nl) @@ -917,11 +932,11 @@ static int fsck_ident(const char **ident, const char *ident_end, "invalid author/committer line - bad date"); if (*p == '0' && p[1] != ' ') return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date"); - if (date_overflows(parse_timestamp(p, &end, 10))) + if (date_overflows(parse_timestamp_from_buf(&p, ident_end))) return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow"); - if (*end != ' ') + if (*p != ' ') return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date"); - p = end + 1; + p++; if ((*p != '+' && *p != '-') || !isdigit(p[1]) || !isdigit(p[2]) || From a031b6181a1e1ee6768d19d6a03b031b6e9004e9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:12:30 -0500 Subject: [PATCH 099/553] t: enable ASan's strict_string_checks option ASan has an option to enable strict string checking, where any pointer passed to a function that expects a NUL-terminated string will be checked for that NUL termination. This can sometimes produce false positives. E.g., it is not wrong to pass a buffer with { '1', '2', '\n' } into strtoul(). Even though it is not NUL-terminated, it will stop at the newline. But in trying it out, it identified two problematic spots in our test suite (which have now been adjusted): 1. The strtol() parsing in cache-tree.c was a real potential problem, which would have been very hard to find otherwise (since it required constructing a very specific broken index file). 2. The use of string functions in fsck_ident() were false positives, because we knew that there was always a trailing newline which would stop the functions from reading off the end of the buffer. But the reasoning behind that is somewhat fragile, and silencing those complaints made the code easier to reason about. So even though this did not find any earth-shattering bugs, and even had a few false positives, I'm sufficiently convinced that its complaints are more helpful than hurtful. Let's turn it on by default (since the test suite now runs cleanly with it) and see if it ever turns up any other instances. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index 92d0db13d7429d..bbda7abb16c1c4 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -77,6 +77,7 @@ prepend_var GIT_SAN_OPTIONS : strip_path_prefix="$GIT_BUILD_DIR/" # want that one to complain to stderr). prepend_var ASAN_OPTIONS : $GIT_SAN_OPTIONS prepend_var ASAN_OPTIONS : detect_leaks=0 +prepend_var ASAN_OPTIONS : strict_string_checks=1 export ASAN_OPTIONS prepend_var LSAN_OPTIONS : $GIT_SAN_OPTIONS From e96105aa1741ebb61c17a59aee962058a3743e09 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:32:43 -0500 Subject: [PATCH 100/553] unit-test: ignore --no-chain-lint In the same spirit as 9faf3963b6 (t: introduce compatibility options to clar-based tests, 2024-12-13), we should ignore --no-chain-lint passed to our clar tests, since it may appear in GIT_TEST_OPTS to be used with other tests. This is particularly important on Windows CI, where --no-chain-lint is added to the test options by default, and the meson build will pass all options to the unit tests. The only reason our meson Windows CI job does not run into this currently is that it is not respecting GIT_TEST_OPTS at all! So ignoring this option is a prerequisite to fixing that situation. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/unit-tests/unit-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/t/unit-tests/unit-test.c b/t/unit-tests/unit-test.c index 5af645048adf4e..752fb38fb324a1 100644 --- a/t/unit-tests/unit-test.c +++ b/t/unit-tests/unit-test.c @@ -29,6 +29,7 @@ int cmd_main(int argc, const char **argv) OPT_NOOP_NOARG('d', "debug"), OPT_NOOP_NOARG(0, "github-workflow-markup"), OPT_NOOP_NOARG(0, "no-bin-wrappers"), + OPT_NOOP_ARG(0, "no-chain-lint"), OPT_NOOP_ARG(0, "root"), OPT_NOOP_ARG(0, "stress"), OPT_NOOP_NOARG(0, "tee"), From 17bd1108eac98d8977a24233a498143ffd577c31 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 04:35:19 -0500 Subject: [PATCH 101/553] ci(windows-meson-test): handle options and output like other test jobs The GitHub windows-meson-test jobs directly run "meson test" with the --slice option. This means they skip all of the ci/lib.sh infrastructure, and in particular: 1. They do not actually set any GIT_TEST_OPTS like --verbose-log or -x. 2. They do not do the usual handle_failed_tests() magic to print test failures or tar up failed directories. As a result, you get almost no feedback at all when a test fails in this job, making debugging rather tricky. Let's try to make this behave more like the other CI jobs. Because we're on Windows, we can't just use the normal run-build-and-tests.sh script. Our build runs as a separate job (like the non-meson Windows job), and then we parallelize the tests across several job slices. So we need something like the run-test-slice.sh script that the "windows-test" job uses. In theory we could just swap out the "make" invocation there for "meson". But it doesn't quite work, because "make" knows how to pull GIT_TEST_OPTS out of GIT-BUILD-OPTIONS automatically. But for meson, we have to extract them into the --test-args option ourselves. I tried making the logic in run-test-slice.sh conditional, but there ended up being hardly any common code at all (and there are some tricky ordering constraints). So I added up with a new meson-specific test-slice runner. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- .github/workflows/main.yml | 12 +++++++++++- ci/run-test-slice-meson.sh | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100755 ci/run-test-slice-meson.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aa6bce673b4e99..e8c824406f17f3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -298,7 +298,17 @@ jobs: path: build - name: Test shell: pwsh - run: meson test -C build --no-rebuild --print-errorlogs --slice "$(1+${{ matrix.nr }})/10" + run: ci/run-test-slice-meson.sh build ${{matrix.nr}} 10 + - name: print test failures + if: failure() && env.FAILED_TEST_ARTIFACTS != '' + shell: bash + run: ci/print-test-failures.sh + - name: Upload failed tests' directories + if: failure() && env.FAILED_TEST_ARTIFACTS != '' + uses: actions/upload-artifact@v4 + with: + name: failed-tests-windows-meson-${{ matrix.nr }} + path: ${{env.FAILED_TEST_ARTIFACTS}} regular: name: ${{matrix.vector.jobname}} (${{matrix.vector.pool}}) diff --git a/ci/run-test-slice-meson.sh b/ci/run-test-slice-meson.sh new file mode 100755 index 00000000000000..961c94fba0b2ee --- /dev/null +++ b/ci/run-test-slice-meson.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# We must load the build options so we know where to find +# things like TEST_OUTPUT_DIRECTORY. This has to come before +# loading lib.sh, though, because it may clobber some CI lib +# variables like our custom GIT_TEST_OPTS. +. "$1"/GIT-BUILD-OPTIONS +. ${0%/*}/lib.sh + +group "Run tests" \ + meson test -C "$1" --no-rebuild --print-errorlogs \ + --test-args="$GIT_TEST_OPTS" --slice "$((1+$2))/$3" || +handle_failed_tests From 14b561e7685ee91d6a3d39684f9089c902641083 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 18 Nov 2025 07:21:24 -0500 Subject: [PATCH 102/553] test-mktemp: plug memory and descriptor leaks We test xmkstemp() in our helper by just calling: xmkstemp(xstrdup(argv[1])); This leaks both the copied string as well as the descriptor returned by the function. In practice this isn't a big deal, since we immediately exit the program, but: 1. LSan will complain about the memory leak. The only reason we did not notice this in our leak-checking builds is that both of the callers in the test suite (both in t0070) pass a broken template (and expect failure). So the function calls die() before we can actually leak. But it's an accident waiting to happen if anybody adds a call which succeeds. 2. Coverity complains about the descriptor leak. There's a long list of uninteresting or false positives in Coverity's results, but since we're here we might as well fix it, too. I didn't bother adding a new test that triggers the leak. It's not even in real production code, but just in the test-helper itself. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-mktemp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/t/helper/test-mktemp.c b/t/helper/test-mktemp.c index 22906889402933..da195640a9dcc8 100644 --- a/t/helper/test-mktemp.c +++ b/t/helper/test-mktemp.c @@ -6,10 +6,16 @@ int cmd__mktemp(int argc, const char **argv) { + char *template; + int fd; + if (argc != 2) usage("Expected 1 parameter defining the temporary file template"); + template = xstrdup(argv[1]); - xmkstemp(xstrdup(argv[1])); + fd = xmkstemp(template); + close(fd); + free(template); return 0; } From a6238ee16371247517e39da36782614a229184ff Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 18 Nov 2025 16:07:32 +0000 Subject: [PATCH 103/553] worktree list: fix column spacing The output of "git worktree list" displays a table containing the worktree path, HEAD OID and branch name for each worktree. The code aligns the columns by measuring the visual width of the worktree path when it is printed. Unfortunately it fails to use the visual width when calculating the width of the column so, if any of the paths contain a multibyte character, we can end up with excess padding between columns. The simplest fix would be to replace strlen() with utf8_strwidth() in measure_widths(). However that leaves us measuring the visual width twice and the byte length once. By caching the visual width and printing the padding separately to the worktree path, we only need to calculate the visual width once and do not need the byte length at all. The visual widths are stored in an arrays of structs rather than an array of ints as the next commit will add more struct members. Even if there are no multibyte characters in any of the paths we still print an extra space between the path and the object id as the field width is calculated as one plus the length of the path and we print an explicit space as well. This is fixed by not printing the extra space. The tests are updated to include multibyte characters in one of the worktree paths and to check the spacing of the columns. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/worktree.c | 35 +++++++++++++++++++++++------------ t/t2402-worktree-list.sh | 22 ++++++++++------------ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index 812774a5ca992c..0643a22ee58b89 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -979,14 +979,17 @@ static void show_worktree_porcelain(struct worktree *wt, int line_terminator) fputc(line_terminator, stdout); } -static void show_worktree(struct worktree *wt, int path_maxlen, int abbrev_len) +struct worktree_display { + int width; +}; + +static void show_worktree(struct worktree *wt, struct worktree_display *display, + int path_maxwidth, int abbrev_len) { struct strbuf sb = STRBUF_INIT; - int cur_path_len = strlen(wt->path); - int path_adj = cur_path_len - utf8_strwidth(wt->path); const char *reason; - strbuf_addf(&sb, "%-*s ", 1 + path_maxlen + path_adj, wt->path); + strbuf_addf(&sb, "%s%*s", wt->path, 1 + path_maxwidth - display->width, ""); if (wt->is_bare) strbuf_addstr(&sb, "(bare)"); else { @@ -1020,20 +1023,24 @@ static void show_worktree(struct worktree *wt, int path_maxlen, int abbrev_len) strbuf_release(&sb); } -static void measure_widths(struct worktree **wt, int *abbrev, int *maxlen) +static void measure_widths(struct worktree **wt, int *abbrev, + struct worktree_display **d, int *maxwidth) { - int i; + int i, display_alloc = 0; + struct worktree_display *display = NULL; for (i = 0; wt[i]; i++) { int sha1_len; - int path_len = strlen(wt[i]->path); + ALLOC_GROW(display, i + 1, display_alloc); + display[i].width = utf8_strwidth(wt[i]->path); - if (path_len > *maxlen) - *maxlen = path_len; + if (display[i].width > *maxwidth) + *maxwidth = display[i].width; sha1_len = strlen(repo_find_unique_abbrev(the_repository, &wt[i]->head_oid, *abbrev)); if (sha1_len > *abbrev) *abbrev = sha1_len; } + *d = display; } static int pathcmp(const void *a_, const void *b_) @@ -1079,21 +1086,25 @@ static int list(int ac, const char **av, const char *prefix, die(_("the option '%s' requires '%s'"), "-z", "--porcelain"); else { struct worktree **worktrees = get_worktrees(); - int path_maxlen = 0, abbrev = DEFAULT_ABBREV, i; + int path_maxwidth = 0, abbrev = DEFAULT_ABBREV, i; + struct worktree_display *display = NULL; /* sort worktrees by path but keep main worktree at top */ pathsort(worktrees + 1); if (!porcelain) - measure_widths(worktrees, &abbrev, &path_maxlen); + measure_widths(worktrees, &abbrev, + &display, &path_maxwidth); for (i = 0; worktrees[i]; i++) { if (porcelain) show_worktree_porcelain(worktrees[i], line_terminator); else - show_worktree(worktrees[i], path_maxlen, abbrev); + show_worktree(worktrees[i], + &display[i], path_maxwidth, abbrev); } + free(display); free_worktrees(worktrees); } return 0; diff --git a/t/t2402-worktree-list.sh b/t/t2402-worktree-list.sh index 8ef1cad7f29d7d..a494df6d612668 100755 --- a/t/t2402-worktree-list.sh +++ b/t/t2402-worktree-list.sh @@ -30,22 +30,20 @@ test_expect_success 'rev-parse --git-path objects linked worktree' ' ' test_expect_success '"list" all worktrees from main' ' - echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && - test_when_finished "rm -rf here out actual expect && git worktree prune" && - git worktree add --detach here main && - echo "$(git -C here rev-parse --show-toplevel) $(git rev-parse --short HEAD) (detached HEAD)" >>expect && - git worktree list >out && - sed "s/ */ /g" actual && + echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && + test_when_finished "rm -rf áááá out actual expect && git worktree prune" && + git worktree add --detach áááá main && + echo "$(git -C áááá rev-parse --show-toplevel) $(git rev-parse --short HEAD) (detached HEAD)" >>expect && + git worktree list >actual && test_cmp expect actual ' test_expect_success '"list" all worktrees from linked' ' - echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && - test_when_finished "rm -rf here out actual expect && git worktree prune" && - git worktree add --detach here main && - echo "$(git -C here rev-parse --show-toplevel) $(git rev-parse --short HEAD) (detached HEAD)" >>expect && - git -C here worktree list >out && - sed "s/ */ /g" actual && + echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && + test_when_finished "rm -rf áááá out actual expect && git worktree prune" && + git worktree add --detach áááá main && + echo "$(git -C áááá rev-parse --show-toplevel) $(git rev-parse --short HEAD) (detached HEAD)" >>expect && + git -C áááá worktree list >actual && test_cmp expect actual ' From 08dfa5983572645ae7cc51b49cadfdf216ecfec6 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 18 Nov 2025 16:07:33 +0000 Subject: [PATCH 104/553] worktree list: quote paths If a worktree path contains newlines or other control characters it messes up the output of "git worktree list". Fix this by using quote_path() to display the worktree path. The output of "git worktree list" is designed for human consumption, scripts should be using the "--porcelain" option so this change should not break them. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/worktree.c | 10 ++++++++-- t/t2402-worktree-list.sh | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index 0643a22ee58b89..303cc3b2d64b64 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -980,6 +980,7 @@ static void show_worktree_porcelain(struct worktree *wt, int line_terminator) } struct worktree_display { + char *path; int width; }; @@ -989,7 +990,7 @@ static void show_worktree(struct worktree *wt, struct worktree_display *display, struct strbuf sb = STRBUF_INIT; const char *reason; - strbuf_addf(&sb, "%s%*s", wt->path, 1 + path_maxwidth - display->width, ""); + strbuf_addf(&sb, "%s%*s", display->path, 1 + path_maxwidth - display->width, ""); if (wt->is_bare) strbuf_addstr(&sb, "(bare)"); else { @@ -1028,11 +1029,14 @@ static void measure_widths(struct worktree **wt, int *abbrev, { int i, display_alloc = 0; struct worktree_display *display = NULL; + struct strbuf buf = STRBUF_INIT; for (i = 0; wt[i]; i++) { int sha1_len; ALLOC_GROW(display, i + 1, display_alloc); - display[i].width = utf8_strwidth(wt[i]->path); + quote_path(wt[i]->path, NULL, &buf, 0); + display[i].width = utf8_strwidth(buf.buf); + display[i].path = strbuf_detach(&buf, NULL); if (display[i].width > *maxwidth) *maxwidth = display[i].width; @@ -1104,6 +1108,8 @@ static int list(int ac, const char **av, const char *prefix, show_worktree(worktrees[i], &display[i], path_maxwidth, abbrev); } + for (i = 0; display && worktrees[i]; i++) + free(display[i].path); free(display); free_worktrees(worktrees); } diff --git a/t/t2402-worktree-list.sh b/t/t2402-worktree-list.sh index a494df6d612668..e0c6abd2f58e20 100755 --- a/t/t2402-worktree-list.sh +++ b/t/t2402-worktree-list.sh @@ -29,7 +29,8 @@ test_expect_success 'rev-parse --git-path objects linked worktree' ' test_cmp expect actual ' -test_expect_success '"list" all worktrees from main' ' +test_expect_success '"list" all worktrees from main core.quotepath=false' ' + test_config core.quotepath false && echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && test_when_finished "rm -rf áááá out actual expect && git worktree prune" && git worktree add --detach áááá main && @@ -38,7 +39,19 @@ test_expect_success '"list" all worktrees from main' ' test_cmp expect actual ' +test_expect_success '"list" all worktrees from main core.quotepath=true' ' + test_config core.quotepath true && + echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && + test_when_finished "rm -rf á out actual expect && git worktree prune" && + git worktree add --detach á main && + echo "\"$(git -C á rev-parse --show-toplevel)\" $(git rev-parse --short HEAD) (detached HEAD)" | + sed s/á/\\\\303\\\\241/g >>expect && + git worktree list >actual && + test_cmp expect actual +' + test_expect_success '"list" all worktrees from linked' ' + test_config core.quotepath false && echo "$(git rev-parse --show-toplevel) $(git rev-parse --short HEAD) [$(git symbolic-ref --short HEAD)]" >expect && test_when_finished "rm -rf áááá out actual expect && git worktree prune" && git worktree add --detach áááá main && From fd7d79d068dd14a4d7a4a93f7bfd31cf24020aec Mon Sep 17 00:00:00 2001 From: Lucas Seiki Oshiro Date: Tue, 18 Nov 2025 17:37:03 -0300 Subject: [PATCH 105/553] repo: factor out field printing to dedicated function Move the field printing in git-repo-info to a new function called `print_field`, allowing it to be called by functions other than `print_fields`. Also change its use of quote_c_style() helper to output directly to the standard output stream, instead of taking a result in a strbuf and then printing it outselves. Signed-off-by: Lucas Seiki Oshiro Signed-off-by: Junio C Hamano --- builtin/repo.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/builtin/repo.c b/builtin/repo.c index 9d4749f79befa8..f9fb4184940e2e 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -85,13 +85,29 @@ static get_value_fn *get_value_fn_for_key(const char *key) return found ? found->get_value : NULL; } +static void print_field(enum output_format format, const char *key, + const char *value) +{ + switch (format) { + case FORMAT_KEYVALUE: + printf("%s=", key); + quote_c_style(value, NULL, stdout, 0); + putchar('\n'); + break; + case FORMAT_NUL_TERMINATED: + printf("%s\n%s%c", key, value, '\0'); + break; + default: + BUG("not a valid output format: %d", format); + } +} + static int print_fields(int argc, const char **argv, struct repository *repo, enum output_format format) { int ret = 0; struct strbuf valbuf = STRBUF_INIT; - struct strbuf quotbuf = STRBUF_INIT; for (int i = 0; i < argc; i++) { get_value_fn *get_value; @@ -105,25 +121,11 @@ static int print_fields(int argc, const char **argv, } strbuf_reset(&valbuf); - strbuf_reset("buf); - get_value(repo, &valbuf); - - switch (format) { - case FORMAT_KEYVALUE: - quote_c_style(valbuf.buf, "buf, NULL, 0); - printf("%s=%s\n", key, quotbuf.buf); - break; - case FORMAT_NUL_TERMINATED: - printf("%s\n%s%c", key, valbuf.buf, '\0'); - break; - default: - BUG("not a valid output format: %d", format); - } + print_field(format, key, valbuf.buf); } strbuf_release(&valbuf); - strbuf_release("buf); return ret; } From 155caac7d1fa981b21192c598cf9bbffdb5aea12 Mon Sep 17 00:00:00 2001 From: Lucas Seiki Oshiro Date: Tue, 18 Nov 2025 17:37:04 -0300 Subject: [PATCH 106/553] repo: add --all to git-repo-info Add a new flag `--all` to git-repo-info for requesting values for all the available keys. By using this flag, the user can retrieve all the values instead of searching what are the desired keys for what they wants. Helped-by: Karthik Nayak Helped-by: Patrick Steinhardt Signed-off-by: Lucas Seiki Oshiro Signed-off-by: Junio C Hamano --- Documentation/git-repo.adoc | 6 +++--- builtin/repo.c | 29 +++++++++++++++++++++++++++-- t/t1900-repo.sh | 21 +++++++++++++++++++++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc index ce43cb19c8b03c..70f0a6d2e47291 100644 --- a/Documentation/git-repo.adoc +++ b/Documentation/git-repo.adoc @@ -8,7 +8,7 @@ git-repo - Retrieve information about the repository SYNOPSIS -------- [synopsis] -git repo info [--format=(keyvalue|nul)] [-z] [...] +git repo info [--format=(keyvalue|nul)] [-z] [--all | ...] git repo structure [--format=(table|keyvalue|nul)] DESCRIPTION @@ -19,13 +19,13 @@ THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. COMMANDS -------- -`info [--format=(keyvalue|nul)] [-z] [...]`:: +`info [--format=(keyvalue|nul)] [-z] [--all | ...]`:: Retrieve metadata-related information about the current repository. Only the requested data will be returned based on their keys (see "INFO KEYS" section below). + The values are returned in the same order in which their respective keys were -requested. +requested. The `--all` flag requests the values for all the available keys. + The output format can be chosen through the flag `--format`. Two formats are supported: diff --git a/builtin/repo.c b/builtin/repo.c index f9fb4184940e2e..e30e2416d4f59d 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -15,7 +15,7 @@ #include "utf8.h" static const char *const repo_usage[] = { - "git repo info [--format=(keyvalue|nul)] [-z] [...]", + "git repo info [--format=(keyvalue|nul)] [-z] [--all | ...]", "git repo structure [--format=(table|keyvalue|nul)]", NULL }; @@ -129,6 +129,23 @@ static int print_fields(int argc, const char **argv, return ret; } +static int print_all_fields(struct repository *repo, + enum output_format format) +{ + struct strbuf valbuf = STRBUF_INIT; + + for (size_t i = 0; i < ARRAY_SIZE(repo_info_fields); i++) { + const struct field *field = &repo_info_fields[i]; + + strbuf_reset(&valbuf); + field->get_value(repo, &valbuf); + print_field(format, field->key, valbuf.buf); + } + + strbuf_release(&valbuf); + return 0; +} + static int parse_format_cb(const struct option *opt, const char *arg, int unset UNUSED) { @@ -152,6 +169,7 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, struct repository *repo) { enum output_format format = FORMAT_KEYVALUE; + int all_keys = 0; struct option options[] = { OPT_CALLBACK_F(0, "format", &format, N_("format"), N_("output format"), @@ -160,6 +178,7 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, N_("synonym for --format=nul"), PARSE_OPT_NONEG | PARSE_OPT_NOARG, parse_format_cb), + OPT_BOOL(0, "all", &all_keys, N_("print all keys/values")), OPT_END() }; @@ -167,7 +186,13 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix, if (format != FORMAT_KEYVALUE && format != FORMAT_NUL_TERMINATED) die(_("unsupported output format")); - return print_fields(argc, argv, repo, format); + if (all_keys && argc) + die(_("--all and cannot be used together")); + + if (all_keys) + return print_all_fields(repo, format); + else + return print_fields(argc, argv, repo, format); } struct ref_stats { diff --git a/t/t1900-repo.sh b/t/t1900-repo.sh index 2beba67889af25..51d55f11a5ed66 100755 --- a/t/t1900-repo.sh +++ b/t/t1900-repo.sh @@ -4,6 +4,15 @@ test_description='test git repo-info' . ./test-lib.sh +# git-repo-info keys. It must contain the same keys listed in the const +# repo_info_fields, in lexicographical order. +REPO_INFO_KEYS=' + layout.bare + layout.shallow + object.format + references.format +' + # Test whether a key-value pair is correctly returned # # Usage: test_repo_info