From 818b64e2e28b8a858d43621787d838bffe56221e Mon Sep 17 00:00:00 2001 From: Alan Braithwaite Date: Fri, 27 Feb 2026 17:23:32 -0800 Subject: [PATCH] fetch, clone: add fetch.blobSizeLimit config External tools like git-lfs and git-fat use the filter clean/smudge mechanism to manage large binary objects, but this requires pointer files, a separate storage backend, and careful coordination. Git's partial clone infrastructure provides a more native approach: large blobs can be excluded at the protocol level during fetch and lazily retrieved on demand. However, enabling this requires passing `--filter=blob:limit=` on every clone, which is not discoverable and cannot be set as a global default. Add a new `fetch.blobSizeLimit` configuration option that enables size-based partial clone behavior globally. When set, both `git clone` and `git fetch` automatically apply a `blob:limit=` filter. Blobs larger than the threshold that are not needed for the current worktree are excluded from the transfer and lazily fetched on demand when needed (e.g., during checkout, diff, or merge). This makes it easy to work with repositories that have accumulated large binary files in their history, without downloading all of them upfront. The precedence order is: 1. Explicit `--filter=` on the command line (highest) 2. Existing `remote..partialclonefilter` 3. `fetch.blobSizeLimit` (new, lowest) Once a clone or fetch applies this setting, the remote is registered as a promisor remote with the corresponding filter spec, so subsequent fetches inherit it automatically. If the server does not support object filtering, the setting is silently ignored. Signed-off-by: Alan Braithwaite --- Documentation/config/fetch.adoc | 19 +++++++++++ builtin/clone.c | 13 +++++++ builtin/fetch.c | 45 +++++++++++++++++++------ t/t5616-partial-clone.sh | 60 +++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 10 deletions(-) diff --git a/Documentation/config/fetch.adoc b/Documentation/config/fetch.adoc index cd40db0cad1c36..4165354dd90bf7 100644 --- a/Documentation/config/fetch.adoc +++ b/Documentation/config/fetch.adoc @@ -103,6 +103,25 @@ config setting. file helps performance of many Git commands, including `git merge-base`, `git push -f`, and `git log --graph`. Defaults to `false`. +`fetch.blobSizeLimit`:: + When set to a size value (e.g., `1m`, `100k`, `1g`), both + linkgit:git-clone[1] and linkgit:git-fetch[1] will automatically + use `--filter=blob:limit=` to enable partial clone + behavior. Blobs larger than this threshold are excluded from the + initial transfer and lazily fetched on demand when needed (e.g., + during checkout). ++ +This provides a convenient way to enable size-based partial clones +globally without passing `--filter` on every command. Once a clone or +fetch applies this setting, the remote is registered as a promisor +remote with the corresponding filter, so subsequent fetches inherit +the filter automatically. ++ +An explicit `--filter` option on the command line takes precedence over +this config. An existing `remote..partialclonefilter` also takes +precedence. If the server does not support object filtering, the +setting is silently ignored. + `fetch.bundleURI`:: This value stores a URI for downloading Git object data from a bundle URI before performing an incremental fetch from the origin Git server. diff --git a/builtin/clone.c b/builtin/clone.c index 45d8fa0eed78c4..1e3261b6235c3f 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -78,6 +78,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; static int config_filter_submodules = -1; /* unspecified */ +static char *config_blob_size_limit; static int option_remote_submodules; static int recurse_submodules_cb(const struct option *opt, @@ -753,6 +754,10 @@ static int git_clone_config(const char *k, const char *v, config_reject_shallow = git_config_bool(k, v); if (!strcmp(k, "clone.filtersubmodules")) config_filter_submodules = git_config_bool(k, v); + if (!strcmp(k, "fetch.blobsizelimit")) { + free(config_blob_size_limit); + git_config_string(&config_blob_size_limit, k, v); + } return git_default_config(k, v, ctx, cb); } @@ -1010,6 +1015,13 @@ int cmd_clone(int argc, argc = parse_options(argc, argv, prefix, builtin_clone_options, builtin_clone_usage, 0); + if (!filter_options.choice && config_blob_size_limit) { + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "blob:limit=%s", config_blob_size_limit); + parse_list_objects_filter(&filter_options, buf.buf); + strbuf_release(&buf); + } + if (argc > 2) usage_msg_opt(_("Too many arguments."), builtin_clone_usage, builtin_clone_options); @@ -1634,6 +1646,7 @@ int cmd_clone(int argc, ref_storage_format); list_objects_filter_release(&filter_options); + free(config_blob_size_limit); string_list_clear(&option_not, 0); string_list_clear(&option_config, 0); diff --git a/builtin/fetch.c b/builtin/fetch.c index 573c2952415bc2..ff898cb6f4e29e 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -109,6 +109,7 @@ struct fetch_config { int recurse_submodules; int parallel; int submodule_fetch_jobs; + char *blob_size_limit; }; static int git_fetch_config(const char *k, const char *v, @@ -160,6 +161,9 @@ static int git_fetch_config(const char *k, const char *v, return 0; } + if (!strcmp(k, "fetch.blobsizelimit")) + return git_config_string(&fetch_config->blob_size_limit, k, v); + if (!strcmp(k, "fetch.output")) { if (!v) return config_error_nonbool(k); @@ -2342,7 +2346,8 @@ static int fetch_multiple(struct string_list *list, int max_children, * or inherit the default filter-spec from the config. */ static inline void fetch_one_setup_partial(struct remote *remote, - struct list_objects_filter_options *filter_options) + struct list_objects_filter_options *filter_options, + const struct fetch_config *config) { /* * Explicit --no-filter argument overrides everything, regardless @@ -2352,10 +2357,12 @@ static inline void fetch_one_setup_partial(struct remote *remote, return; /* - * If no prior partial clone/fetch and the current fetch DID NOT - * request a partial-fetch, do a normal fetch. + * If no prior partial clone/fetch, the current fetch did not + * request a partial-fetch, and no global blob size limit is + * configured, do a normal fetch. */ - if (!repo_has_promisor_remote(the_repository) && !filter_options->choice) + if (!repo_has_promisor_remote(the_repository) && + !filter_options->choice && !config->blob_size_limit) return; /* @@ -2372,11 +2379,27 @@ static inline void fetch_one_setup_partial(struct remote *remote, /* * Do a partial-fetch from the promisor remote using either the * explicitly given filter-spec or inherit the filter-spec from - * the config. + * the per-remote config. + */ + if (repo_has_promisor_remote(the_repository)) { + partial_clone_get_default_filter_spec(filter_options, + remote->name); + if (filter_options->choice) + return; + } + + /* + * Fall back to the global fetch.blobSizeLimit config. This + * enables partial clone behavior without requiring --filter + * on the command line or a pre-existing promisor remote. */ - if (!filter_options->choice) - partial_clone_get_default_filter_spec(filter_options, remote->name); - return; + if (!filter_options->choice && config->blob_size_limit) { + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "blob:limit=%s", config->blob_size_limit); + parse_list_objects_filter(filter_options, buf.buf); + strbuf_release(&buf); + partial_clone_register(remote->name, filter_options); + } } static int fetch_one(struct remote *remote, int argc, const char **argv, @@ -2762,9 +2785,10 @@ int cmd_fetch(int argc, oidset_clear(&acked_commits); trace2_region_leave("fetch", "negotiate-only", the_repository); } else if (remote) { - if (filter_options.choice || repo_has_promisor_remote(the_repository)) { + if (filter_options.choice || repo_has_promisor_remote(the_repository) || + config.blob_size_limit) { trace2_region_enter("fetch", "setup-partial", the_repository); - fetch_one_setup_partial(remote, &filter_options); + fetch_one_setup_partial(remote, &filter_options, &config); trace2_region_leave("fetch", "setup-partial", the_repository); } trace2_region_enter("fetch", "fetch-one", the_repository); @@ -2876,5 +2900,6 @@ int cmd_fetch(int argc, cleanup: string_list_clear(&list, 0); list_objects_filter_release(&filter_options); + free(config.blob_size_limit); return result; } diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1e354e057fa12c..44b41f315f855d 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -722,6 +722,66 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work git -C partial gc --prune=now ' +# Test fetch.blobSizeLimit config + +test_expect_success 'setup for fetch.blobSizeLimit tests' ' + git init blob-limit-src && + echo "small" >blob-limit-src/small.txt && + dd if=/dev/zero of=blob-limit-src/large.bin bs=1024 count=100 2>/dev/null && + git -C blob-limit-src add . && + git -C blob-limit-src commit -m "initial" && + + git clone --bare "file://$(pwd)/blob-limit-src" blob-limit-srv.bare && + git -C blob-limit-srv.bare config --local uploadpack.allowfilter 1 && + git -C blob-limit-srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +test_expect_success 'clone with fetch.blobSizeLimit config applies filter' ' + git -c fetch.blobSizeLimit=1k clone \ + "file://$(pwd)/blob-limit-srv.bare" blob-limit-clone && + + test "$(git -C blob-limit-clone config --local remote.origin.promisor)" = "true" && + test "$(git -C blob-limit-clone config --local remote.origin.partialclonefilter)" = "blob:limit=1024" +' + +test_expect_success 'clone with --filter overrides fetch.blobSizeLimit' ' + git -c fetch.blobSizeLimit=1k clone --filter=blob:none \ + "file://$(pwd)/blob-limit-srv.bare" blob-limit-override && + + test "$(git -C blob-limit-override config --local remote.origin.partialclonefilter)" = "blob:none" +' + +test_expect_success 'fetch with fetch.blobSizeLimit registers promisor remote' ' + git clone --no-checkout "file://$(pwd)/blob-limit-srv.bare" blob-limit-fetch && + + # Sanity: not yet a partial clone + test_must_fail git -C blob-limit-fetch config --local remote.origin.promisor && + + # Add a new commit to the server + echo "new-small" >blob-limit-src/new-small.txt && + dd if=/dev/zero of=blob-limit-src/new-large.bin bs=1024 count=100 2>/dev/null && + git -C blob-limit-src add . && + git -C blob-limit-src commit -m "second" && + git -C blob-limit-src push "file://$(pwd)/blob-limit-srv.bare" main && + + # Fetch with the config set + git -C blob-limit-fetch -c fetch.blobSizeLimit=1k fetch origin && + + test "$(git -C blob-limit-fetch config --local remote.origin.promisor)" = "true" && + test "$(git -C blob-limit-fetch config --local remote.origin.partialclonefilter)" = "blob:limit=1024" +' + +test_expect_success 'fetch.blobSizeLimit does not override existing partialclonefilter' ' + git clone --filter=blob:none \ + "file://$(pwd)/blob-limit-srv.bare" blob-limit-existing && + + test "$(git -C blob-limit-existing config --local remote.origin.partialclonefilter)" = "blob:none" && + + # Fetch with a different blobSizeLimit; existing filter should win + git -C blob-limit-existing -c fetch.blobSizeLimit=1k fetch origin && + + test "$(git -C blob-limit-existing config --local remote.origin.partialclonefilter)" = "blob:none" +' . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd