Skip to content

Commit 223fa04

Browse files
committed
Use URLs as default canonical IDs in common repo rules
RELNOTES[INC]: The `http_archive`, `http_file`, `http_jar`, `jvm_maven_import_external`, and `jvm_import_external` repository rules now use the URLs as the default canonical ID. If this behavior is not desired, it can be disabled via `--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1`. The `--experimental_repository_cache_urls_as_default_canonical_id` flag is now a no-op.
1 parent 358bec8 commit 223fa04

File tree

14 files changed

+128
-57
lines changed

14 files changed

+128
-57
lines changed

scripts/bootstrap/bootstrap.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,14 @@ fi
3131

3232
: ${JAVA_VERSION:="11"}
3333

34+
# TODO: remove `--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1` once all dependencies are mirrored.
35+
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
3436
_BAZEL_ARGS="--spawn_strategy=standalone \
3537
--nojava_header_compilation \
3638
--strategy=Javac=worker --worker_quit_after_build --ignore_unsupported_sandboxing \
3739
--compilation_mode=opt \
3840
--repository_cache=derived/repository_cache \
41+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
3942
--extra_toolchains=//scripts/bootstrap:all \
4043
--extra_toolchains=@bazel_tools//tools/python:autodetecting_toolchain \
4144
--enable_bzlmod \

src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
345345
if (repoOptions.repositoryDownloaderRetries >= 0) {
346346
downloadManager.setRetries(repoOptions.repositoryDownloaderRetries);
347347
}
348-
downloadManager.setUrlsAsDefaultCanonicalId(repoOptions.urlsAsDefaultCanonicalId);
349348

350349
repositoryCache.setHardlink(repoOptions.useHardlinks);
351350
if (repoOptions.experimentalScaleTimeouts > 0.0) {

src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryOptions.java

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,14 +265,10 @@ public Converter() {
265265
@Option(
266266
name = "experimental_repository_cache_urls_as_default_canonical_id",
267267
defaultValue = "false",
268-
documentationCategory = OptionDocumentationCategory.BAZEL_CLIENT_OPTIONS,
269-
effectTags = {OptionEffectTag.LOADING_AND_ANALYSIS},
270-
metadataTags = {OptionMetadataTag.EXPERIMENTAL},
271-
help =
272-
"If true, use a string derived from the URLs of repository downloads as the canonical_id "
273-
+ "if not specified. This causes a change in the URLs to result in a redownload even "
274-
+ "if the cache contains a download with the same hash. This can be used to verify "
275-
+ "that URL changes don't result in broken repositories being masked by the cache.")
268+
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
269+
metadataTags = OptionMetadataTag.DEPRECATED,
270+
effectTags = {OptionEffectTag.NO_OP},
271+
help = "No-op.")
276272
public boolean urlsAsDefaultCanonicalId;
277273

278274
@Option(

src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/DownloadManager.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ public class DownloadManager {
5959
private final Downloader downloader;
6060
private boolean disableDownload = false;
6161
private int retries = 0;
62-
private boolean urlsAsDefaultCanonicalId;
6362
@Nullable private Credentials netrcCreds;
6463
private CredentialFactory credentialFactory = StaticCredentials::new;
6564

@@ -90,10 +89,6 @@ public void setRetries(int retries) {
9089
this.retries = retries;
9190
}
9291

93-
public void setUrlsAsDefaultCanonicalId(boolean urlsAsDefaultCanonicalId) {
94-
this.urlsAsDefaultCanonicalId = urlsAsDefaultCanonicalId;
95-
}
96-
9792
public void setNetrcCreds(Credentials netrcCreds) {
9893
this.netrcCreds = netrcCreds;
9994
}
@@ -134,9 +129,6 @@ public Path download(
134129
if (Thread.interrupted()) {
135130
throw new InterruptedException();
136131
}
137-
if (Strings.isNullOrEmpty(canonicalId) && urlsAsDefaultCanonicalId) {
138-
canonicalId = originalUrls.stream().map(URL::toExternalForm).collect(Collectors.joining(" "));
139-
}
140132

141133
// TODO(andreisolo): This code path is inconsistent as the authHeaders are fetched from a
142134
// .netrc only if it comes from a http_{archive,file,jar} - and it is handled directly

src/test/java/com/google/devtools/build/lib/blackbox/bazel/DefaultToolsSetup.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ public void setup(BlackBoxTestContext context) throws IOException {
7878
String sharedRepoCache = System.getenv("REPOSITORY_CACHE");
7979
if (sharedRepoCache != null) {
8080
lines.add("common --repository_cache=" + sharedRepoCache);
81+
// TODO: Remove this flag once all dependencies are mirrored.
82+
// See https://github.com/bazelbuild/bazel/pull/19549 for more context.
83+
lines.add("common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1");
8184
if (OS.getCurrent() == OS.DARWIN) {
8285
// For reducing SSD usage on our physical Mac machines.
8386
lines.add("common --experimental_repository_cache_hardlinks");

src/test/py/bazel/bzlmod/bazel_fetch_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ def generatBuiltinModules(self):
6161
self.ScratchFile('tools_mock/WORKSPACE')
6262
self.ScratchFile('tools_mock/MODULE.bazel', ['module(name="bazel_tools")'])
6363
self.ScratchFile('tools_mock/tools/build_defs/repo/BUILD')
64+
self.CopyFile(
65+
self.Rlocation('io_bazel/tools/build_defs/repo/cache.bzl'),
66+
'tools_mock/tools/build_defs/repo/cache.bzl',
67+
)
6468
self.CopyFile(
6569
self.Rlocation('io_bazel/tools/build_defs/repo/http.bzl'),
6670
'tools_mock/tools/build_defs/repo/http.bzl',

src/test/py/bazel/test_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ def setUp(self):
127127
shared_repo_cache = os.environ.get('REPOSITORY_CACHE')
128128
if shared_repo_cache:
129129
f.write('common --repository_cache={}\n'.format(shared_repo_cache))
130+
# TODO: Remove this flag once all dependencies are mirrored.
131+
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
132+
f.write('common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1\n')
130133
if TestBase.IsDarwin():
131134
# For reducing SSD usage on our physical Mac machines.
132135
f.write('common --experimental_repository_cache_hardlinks\n')

src/test/shell/bazel/bazel_repository_cache_test.sh

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ function test_fetch_value_with_existing_cache_and_no_network() {
221221
cache_entry="$repo_cache_dir/content_addressable/sha256/$sha256"
222222
mkdir -p "$cache_entry"
223223
cp "$repo2_zip" "$cache_entry/file" # Artifacts are named uniformly as "file" in the cache
224+
http_archive_url="http://localhost:$nc_port/bleh"
225+
canonical_id_hash=$(printf "$http_archive_url" | sha256sum | cut -f 1 -d ' ')
226+
touch "$cache_entry/id-$canonical_id_hash"
224227

225228
# Fetch without a server
226229
shutdown_server
@@ -271,6 +274,7 @@ EOF
271274
# to do without checksum. But we can safely do so, as the loopback device
272275
# is reasonably safe against man-in-the-middle attacks.
273276
bazel fetch --repository_cache="$repo_cache_dir" \
277+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
274278
//zoo:breeding-program >& $TEST_log \
275279
|| fail "expected fetch to succeed"
276280

@@ -283,6 +287,7 @@ EOF
283287

284288
# As we don't have a predicted cache, we expect fetching to fail now.
285289
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
290+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
286291
&& fail "expected failure" || :
287292

288293
# However, if we add the hash, the value is taken from cache
@@ -298,6 +303,7 @@ http_archive(
298303
)
299304
EOF
300305
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
306+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
301307
|| fail "expected fetch to succeed"
302308
}
303309

@@ -465,15 +471,19 @@ EOF
465471
expect_log "Error downloading"
466472
}
467473

468-
function test_break_url() {
474+
function test_http_archive_no_default_canonical_id() {
469475
setup_repository
470476

471-
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
477+
bazel fetch --repository_cache="$repo_cache_dir" \
478+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
479+
//zoo:breeding-program >& $TEST_log \
472480
|| echo "Expected fetch to succeed"
473481

474482
shutdown_server
475483

476-
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
484+
bazel fetch --repository_cache="$repo_cache_dir" \
485+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
486+
//zoo:breeding-program >& $TEST_log \
477487
|| echo "Expected fetch to succeed"
478488

479489
# Break url in WORKSPACE
@@ -489,24 +499,24 @@ http_archive(
489499
)
490500
EOF
491501

492-
# By default, cache entry will still match by sha256, even if url is changed.
493-
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
502+
# Without the default canonical id, cache entry will still match by sha256, even if url is
503+
# changed.
504+
bazel fetch --repository_cache="$repo_cache_dir" \
505+
--repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1 \
506+
//zoo:breeding-program >& $TEST_log \
494507
|| echo "Expected fetch to succeed"
495508
}
496509

497-
function test_experimental_repository_cache_urls_as_default_canonical_id() {
510+
511+
function test_http_archive_urls_as_default_canonical_id() {
498512
setup_repository
499513

500-
bazel fetch --repository_cache="$repo_cache_dir" \
501-
--experimental_repository_cache_urls_as_default_canonical_id \
502-
//zoo:breeding-program >& $TEST_log \
514+
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
503515
|| echo "Expected fetch to succeed"
504516

505517
shutdown_server
506518

507-
bazel fetch --repository_cache="$repo_cache_dir" \
508-
--experimental_repository_cache_urls_as_default_canonical_id \
509-
//zoo:breeding-program >& $TEST_log \
519+
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
510520
|| echo "Expected fetch to succeed"
511521

512522
# Break url in WORKSPACE
@@ -522,10 +532,10 @@ http_archive(
522532
)
523533
EOF
524534

535+
# TODO: Remove when the integration test setup itself no longer relies on this.
536+
unset BAZEL_NO_DEFAULT_CANONICAL_ID
525537
# As repository cache key should depend on urls, we expect fetching to fail now.
526-
bazel fetch --repository_cache="$repo_cache_dir" \
527-
--experimental_repository_cache_urls_as_default_canonical_id \
528-
//zoo:breeding-program >& $TEST_log \
538+
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
529539
&& fail "expected failure" || :
530540
}
531541

src/test/shell/testenv.sh.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ EOF
327327
if [[ -n ${REPOSITORY_CACHE:-} ]]; then
328328
echo "testenv.sh: Using repository cache at $REPOSITORY_CACHE."
329329
echo "common --repository_cache=$REPOSITORY_CACHE" >> $TEST_TMPDIR/bazelrc
330+
# TODO: Remove this flag once all dependencies are mirrored.
331+
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
332+
echo "common --repo_env=BAZEL_NO_DEFAULT_CANONICAL_ID=1" >> $TEST_TMPDIR/bazelrc
330333
if is_darwin; then
331334
# For reducing SSD usage on our physical Mac machines.
332335
echo "testenv.sh: Enabling --experimental_repository_cache_hardlinks"

src/test/tools/bzlmod/MODULE.bazel.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/build_defs/repo/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ filegroup(
1515
filegroup(
1616
name = "http_src",
1717
srcs = [
18+
"cache.bzl",
1819
"http.bzl",
1920
"utils.bzl",
2021
],

tools/build_defs/repo/cache.bzl

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# WARNING:
16+
# https://github.com/bazelbuild/bazel/issues/17713
17+
# .bzl files in this package (tools/build_defs/repo) are evaluated
18+
# in a Starlark environment without "@_builtins" injection, and must not refer
19+
# to symbols associated with build/workspace .bzl files
20+
21+
visibility("private")
22+
23+
NO_DEFAULT_CANONICAL_ID_ENV = "BAZEL_NO_DEFAULT_CANONICAL_ID"
24+
25+
CANONICAL_ID_DOC = """A canonical ID of the file downloaded.
26+
27+
If specified and non-empty, Bazel will not take the file from cache, unless it
28+
was added to the cache by a request with the same canonical ID.
29+
30+
If unspecified or empty, Bazel by default uses the URLs of the file as the
31+
canonical ID. This helps catch the common mistake of updating the URLs without
32+
also updating the hash, resulting in builds that succeed locally but fail on
33+
machines without the file in the cache. This behavior can be disabled with
34+
--repo_env={env}=1.
35+
""".format(env = NO_DEFAULT_CANONICAL_ID_ENV)
36+
37+
def get_default_canonical_id(repository_ctx, urls):
38+
"""Returns the default canonical id to use for downloads."""
39+
if repository_ctx.os.environ.get(NO_DEFAULT_CANONICAL_ID_ENV) == "1":
40+
return ""
41+
42+
# Do not sort URLs to prevent the following scenario:
43+
# 1. http_archive with urls = [B, A] created.
44+
# 2. Successful fetch from B results in canonical ID "A B".
45+
# 3. Order of urls is flipped to [A, B].
46+
# 4. Fetch would reuse cache entry for "A B", even though A may be broken (it has never been
47+
# fetched before).
48+
return " ".join(urls)

0 commit comments

Comments
 (0)