Skip to content

Commit 04fdc9c

Browse files
authored
Merge pull request ocaml#10534 from gridbugs/stop-treating-toolchains-as-packages-in-pkg-rules
Stop treating toolchains as packages in pkg rules
2 parents 98efedf + c740f6a commit 04fdc9c

File tree

7 files changed

+347
-242
lines changed

7 files changed

+347
-242
lines changed

src/dune_engine/scheduler.mli

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ val cancel_current_build : unit -> unit Fiber.t
143143

144144
val inject_memo_invalidation : Memo.Invalidation.t -> unit Fiber.t
145145

146-
(** [sleep duration] wait for [duration] to elapse. Sleepers are checked for
147-
wake up at a rate of once per 0.1 seconds. So [duration] should be at least
148-
this long. *)
146+
(** [sleep duration] wait for [duration] seconds to elapse. Sleepers
147+
are checked for wake up at a rate of once per 0.1 seconds. So
148+
[duration] should be at least this long. *)
149149
val sleep : float -> unit Fiber.t
150150

151151
val stats : unit -> Dune_stats.t option Fiber.t

src/dune_pkg/flock.ml

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
open! Stdune
2+
module Flock = Dune_util.Flock
3+
module Scheduler = Dune_engine.Scheduler
4+
5+
type t =
6+
{ flock : Flock.t
7+
; lock_path : Path.t
8+
}
9+
10+
(* Global mutable set of names, used to prevent printing "waiting for
11+
lock" messages multiple times when multiple concurrent fibers try
12+
to take a lock at the same time. *)
13+
module Global_waiting_names = struct
14+
let state = lazy (String.Table.create 1)
15+
16+
(* add a name to the set, returning [true] iff the name wasn't
17+
already in the set *)
18+
let add name =
19+
let state = Lazy.force state in
20+
String.Table.add state name () |> Result.is_ok
21+
;;
22+
23+
let remove name =
24+
let state = Lazy.force state in
25+
String.Table.remove state name
26+
;;
27+
end
28+
29+
let attempt_to_lock { flock; lock_path } ~name_for_messages ~timeout_s =
30+
let open Fiber.O in
31+
let current_dune_pid = Unix.getpid () in
32+
let rec loop timeout_s =
33+
match Flock.lock_non_block flock Flock.Exclusive with
34+
| Error e -> Fiber.return @@ Error e
35+
| Ok `Success ->
36+
Global_waiting_names.remove name_for_messages;
37+
Fiber.return (Ok `Success)
38+
| Ok `Failure -> handle_failure timeout_s
39+
and handle_failure timeout_s =
40+
let locked_by_pid = int_of_string (Io.read_file lock_path) in
41+
let sleep_duration_s = 0.1 in
42+
let remaining_duration_s = timeout_s -. sleep_duration_s in
43+
if remaining_duration_s <= 0.0
44+
then Fiber.return (Ok `Timeout)
45+
else (
46+
if locked_by_pid <> current_dune_pid && Global_waiting_names.add name_for_messages
47+
then
48+
(* Only print this message if the dune process that holds the
49+
lock isn't the current process and this is the first time
50+
that the current process has failed to take the lock since
51+
the last time it successfully took the lock. This prevents
52+
the situation where multiple fibers all attempt to take the
53+
lock concurrently while it's held by another process from
54+
causing the following message from being printed multiple
55+
times. *)
56+
User_message.print
57+
(User_message.make
58+
[ Pp.textf
59+
"Waiting for another instance of dune (pid %d) to release the lock for \
60+
the resource %S..."
61+
locked_by_pid
62+
name_for_messages
63+
]);
64+
let* () = Scheduler.sleep sleep_duration_s in
65+
loop remaining_duration_s)
66+
in
67+
loop timeout_s
68+
;;
69+
70+
let with_flock lock_path ~name_for_messages ~timeout_s ~f =
71+
let open Fiber.O in
72+
let parent = Path.parent_exn lock_path in
73+
Path.mkdir_p parent;
74+
let fd =
75+
Unix.openfile
76+
(Path.to_string lock_path)
77+
[ Unix.O_CREAT; O_WRONLY; O_SHARE_DELETE; O_CLOEXEC ]
78+
0o600
79+
in
80+
let out = Unix.out_channel_of_descr fd in
81+
let flock = Flock.create fd in
82+
let current_dune_pid = Unix.getpid () in
83+
Fiber.finalize
84+
~finally:(fun () ->
85+
let+ () = Fiber.return () in
86+
close_out out)
87+
(fun () ->
88+
attempt_to_lock { flock; lock_path } ~name_for_messages ~timeout_s
89+
>>= function
90+
| Ok `Success ->
91+
Fiber.finalize
92+
(fun () ->
93+
Printf.fprintf out "%d%!" current_dune_pid;
94+
f ())
95+
~finally:(fun () ->
96+
let+ () = Fiber.return () in
97+
match Flock.unlock flock with
98+
| Ok () ->
99+
(* Note that after the lock has been released, we
100+
deliberately don't delete the lock file to avoid a race
101+
condition where other processes or fibers still need to
102+
read the file to determine the process that held the
103+
lock. Even though the lock has been released, other
104+
parties may be in between timing out waiting for the
105+
lock and reading the lock file to get the pid to
106+
include in their error message. *)
107+
()
108+
| Error ue ->
109+
Unix_error.Detailed.create ue ~syscall:"flock" ~arg:"unlock"
110+
|> Unix_error.Detailed.raise)
111+
| Ok `Timeout ->
112+
let locked_by_pid = int_of_string (Io.read_file lock_path) in
113+
if locked_by_pid == current_dune_pid
114+
then
115+
Code_error.raise
116+
"timeout while waiting for flock, but flock was currently held by the \
117+
current process"
118+
[ "name_for_messages", Dyn.string name_for_messages ]
119+
else
120+
User_error.raise
121+
~hints:
122+
[ Pp.textf
123+
"Another instance of dune (pid %d) currently holds the lock for the \
124+
resource %S. If this is unexpected, terminate that process and re-run \
125+
the command."
126+
locked_by_pid
127+
name_for_messages
128+
; Pp.textf
129+
"As a last resort, if the other instance of dune (pid %d) is no longer \
130+
running, manually delete the lock file %s."
131+
locked_by_pid
132+
(Path.to_string_maybe_quoted lock_path)
133+
]
134+
[ Pp.textf
135+
"Timed out after %.2f seconds while waiting for another instance of dune \
136+
(pid %d) to release the lock on the resource %S."
137+
timeout_s
138+
locked_by_pid
139+
name_for_messages
140+
]
141+
| Error error ->
142+
User_error.raise
143+
[ Pp.textf
144+
"Failed to get a lock for the resource %S with lock file %s: %s"
145+
name_for_messages
146+
(Path.to_string_maybe_quoted lock_path)
147+
(Unix.error_message error)
148+
])
149+
;;

src/dune_pkg/flock.mli

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
open! Stdune
2+
3+
(** [with_flock path ~name_for_messages ~timeout_s ~f] ensures mutual
4+
exclusion for the function [f] across multiple concurrent
5+
instances of dune, using the lock file at [path] to coordinate
6+
between different dune instances. If the lock is not acquired
7+
after [timeout_s] seconds then a [User_error] is raised. Pass
8+
[infinity] to keep trying to take the lock
9+
forever. [name_for_messages] is the name used to refer to this
10+
lock in error messages.
11+
12+
Within the a dune process, this function also ensures mutual
13+
exclusion between fibers. Note that if this function times out
14+
waiting for the lock while the lock is held by a different fiber
15+
of the same dune process, a [Code_error] is raised rather than a
16+
[User_error]. If a timeout is possible, avoid allowing multiple
17+
fibers to concurrently attempt to take a flock. *)
18+
val with_flock
19+
: Path.t
20+
-> name_for_messages:string
21+
-> timeout_s:float
22+
-> f:(unit -> 'a Fiber.t)
23+
-> 'a Fiber.t

src/dune_pkg/rev_store.ml

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ module Process = Dune_engine.Process
44
module Display = Dune_engine.Display
55
module Scheduler = Dune_engine.Scheduler
66
module Re = Dune_re
7-
module Flock = Dune_util.Flock
87
open Fiber.O
98

109
module Object = struct
@@ -62,73 +61,7 @@ let lock_path { dir; _ } =
6261
Path.relative parent "rev-store.lock"
6362
;;
6463

65-
let rec attempt_to_lock flock lock ~max_retries =
66-
let sleep_duration = 0.1 in
67-
match Flock.lock_non_block flock lock with
68-
| Error e -> Fiber.return @@ Error e
69-
| Ok `Success -> Fiber.return (Ok `Success)
70-
| Ok `Failure ->
71-
if max_retries > 0
72-
then
73-
let* () = Scheduler.sleep sleep_duration in
74-
attempt_to_lock flock lock ~max_retries:(max_retries - 1)
75-
else Fiber.return (Ok `Failure)
76-
;;
77-
78-
let with_flock lock_path ~f =
79-
let open Fiber.O in
80-
let parent = Path.parent_exn lock_path in
81-
Path.mkdir_p parent;
82-
let fd =
83-
Unix.openfile
84-
(Path.to_string lock_path)
85-
[ Unix.O_CREAT; O_WRONLY; O_SHARE_DELETE; Unix.O_CLOEXEC ]
86-
0o600
87-
in
88-
let out = Unix.out_channel_of_descr fd in
89-
let flock = Flock.create fd in
90-
let max_retries = 49 in
91-
Fiber.finalize
92-
~finally:(fun () ->
93-
let+ () = Fiber.return () in
94-
close_out out)
95-
(fun () ->
96-
attempt_to_lock flock Flock.Exclusive ~max_retries
97-
>>= function
98-
| Ok `Success ->
99-
Fiber.finalize
100-
(fun () ->
101-
Printf.fprintf out "%d\n%!" (Unix.getpid ());
102-
f ())
103-
~finally:(fun () ->
104-
let+ () = Fiber.return () in
105-
Path.unlink_no_err lock_path;
106-
match Flock.unlock flock with
107-
| Ok () -> ()
108-
| Error ue ->
109-
Unix_error.Detailed.create ue ~syscall:"flock" ~arg:"unlock"
110-
|> Unix_error.Detailed.raise)
111-
| Ok `Failure ->
112-
let pid = Io.read_file lock_path in
113-
User_error.raise
114-
~hints:
115-
[ Pp.textf
116-
"Another dune instance (pid %s) has locked the revision store. If this \
117-
is happening in error, make sure to terminate that instance and re-run \
118-
the command."
119-
pid
120-
]
121-
[ Pp.textf "Couldn't acquire revision store lock after %d attempts" max_retries
122-
]
123-
| Error error ->
124-
User_error.raise
125-
[ Pp.textf
126-
"Failed to get a lock for the revision store at %s: %s"
127-
(Path.to_string_maybe_quoted lock_path)
128-
(Unix.error_message error)
129-
])
130-
;;
131-
64+
let with_flock = Flock.with_flock ~name_for_messages:"revision store" ~timeout_s:5.0
13265
let failure_mode = Process.Failure_mode.Return
13366
let output_limit = Sys.max_string_length
13467
let make_stdout () = Process.Io.make_stdout ~output_on_success:Swallow ~output_limit

src/dune_pkg/toolchain.ml

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ module Compiler_package = struct
9797
:: ([ "ocaml-system"; "ocaml-variants" ] |> List.map ~f:Package_name.of_string)
9898
;;
9999

100+
let is_compiler_package_by_name name =
101+
List.exists ~f:(Package_name.equal name) package_names
102+
;;
103+
100104
let constraint_ =
101105
let open Dune_lang in
102106
let constraint_ =
@@ -181,6 +185,10 @@ module Version = struct
181185

182186
let bin_dir t = Path.Outside_build_dir.relative (target_dir t) "bin"
183187
let is_installed t = Path.exists (Path.outside_build_dir (target_dir t))
188+
189+
let flock_path t =
190+
Path.Outside_build_dir.relative (toolchain_dir t) "lock" |> Path.outside_build_dir
191+
;;
184192
end
185193

186194
let handle_checksum_mismatch { Compiler_package.version; url; checksum } ~got_checksum =
@@ -285,18 +293,17 @@ let get ~log version =
285293
| `Never -> ()
286294
| _ -> User_message.print (User_message.make [ Pp.tag style pp ])
287295
in
288-
if Version.is_installed version
289-
then (
290-
(match log with
291-
| `Always ->
292-
log_print Success
293-
@@ Pp.textf
294-
"Version %s of the compiler toolchain is already installed in %s"
295-
(Version.to_string version)
296-
(Version.target_dir version |> Path.Outside_build_dir.to_string)
297-
| _ -> ());
298-
Fiber.return ())
299-
else (
296+
let print_already_installed_message () =
297+
match log with
298+
| `Always ->
299+
log_print Success
300+
@@ Pp.textf
301+
"Version %s of the compiler toolchain is already installed in %s"
302+
(Version.to_string version)
303+
(Version.target_dir version |> Path.Outside_build_dir.to_string)
304+
| _ -> ()
305+
in
306+
let download_build_install () =
300307
let compiler_package = Compiler_package.of_version version in
301308
log_print Details
302309
@@ Pp.textf
@@ -315,5 +322,40 @@ let get ~log version =
315322
@@ Pp.textf
316323
"Success! Compiler toolchain version %s installed to %s."
317324
(Version.to_string version)
318-
(Version.target_dir version |> Path.Outside_build_dir.to_string))
325+
(Version.target_dir version |> Path.Outside_build_dir.to_string)
326+
in
327+
if Version.is_installed version
328+
then (
329+
print_already_installed_message ();
330+
Fiber.return ())
331+
else
332+
Flock.with_flock
333+
(Version.flock_path version)
334+
~name_for_messages:(sprintf "toolchain version %s" (Version.to_string version))
335+
~timeout_s:infinity
336+
~f:(fun () ->
337+
(* Note that we deliberately check if the toolchain is
338+
installed before and after taking the flock.
339+
340+
The first check prevents us from trying to take the lock if
341+
the toolchain is installed. To build any package dune first
342+
checks if the necessary toolchain is installed, so to build
343+
a project with many dependencies, dune will check if the
344+
toolchain is installed many times. If this check required
345+
first taking a lock, multiple concurrent dune instances
346+
would sometimes contest the lock. This isn't too bad for
347+
performance as the lock is only held briefly, but when dune
348+
waits on a flock it prints a message, so freqeunt, brief
349+
lock acquisitions can lead to a lot of noise in the
350+
output.
351+
352+
The second check is to handle the case where the toolchain
353+
was installed in between the first check, and the flock
354+
being acquired.
355+
*)
356+
if Version.is_installed version
357+
then (
358+
print_already_installed_message ();
359+
Fiber.return ())
360+
else download_build_install ())
319361
;;

src/dune_pkg/toolchain.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ module Compiler_package : sig
88
used instead. *)
99
val package_names : Package_name.t list
1010

11+
val is_compiler_package_by_name : Package_name.t -> bool
12+
1113
(** Constraint to apply to the dependency solver to guarantee a
1214
solution that's includes a version of a compiler package that's
1315
supported by dune toolchains. *)

0 commit comments

Comments
 (0)