Skip to content

CA-412313: don't lose distributed tracing spans when XAPI is shut down #6525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions ocaml/libs/tracing/tracing_export.ml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ module Destination = struct
(* Note this signal will flush the spans and terminate the exporter thread *)
let signal () = Delay.signal delay

let wait_exit = Delay.make ()

let create_exporter () =
enable_span_garbage_collector () ;
Thread.create
Expand All @@ -319,7 +321,8 @@ module Destination = struct
signaled := true
) ;
flush_spans ()
done
done ;
Delay.signal wait_exit
)
()

Expand All @@ -339,6 +342,12 @@ module Destination = struct
)
end

let flush_and_exit = Destination.signal
let flush_and_exit ~max_wait () =
D.debug "flush_and_exit: signaling thread to export now" ;
Destination.signal () ;
if Delay.wait Destination.wait_exit max_wait then
D.info "flush_and_exit: timeout on span export"
else
D.debug "flush_and_exit: span export finished"

let main = Destination.main
6 changes: 3 additions & 3 deletions ocaml/libs/tracing/tracing_export.mli
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ module Destination : sig
end
end

val flush_and_exit : unit -> unit
(** [flush_and_exit ()] sends a signal to flush the finish spans and terminate
the exporter thread.
val flush_and_exit : max_wait:float -> unit -> unit
(** [flush_and_exit ~max_wait ()] sends a signal to flush the finish spans and terminate
the exporter thread. It waits at most [max_wait] seconds.
*)

val main : unit -> Thread.t
Expand Down
4 changes: 2 additions & 2 deletions ocaml/tests/bench/bench_tracing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ let export_thread =
(* need to ensure this isn't running outside the benchmarked section,
or bechamel might fail with 'Failed to stabilize GC'
*)
let after _ = Tracing_export.flush_and_exit () in
let after _ = Tracing_export.flush_and_exit ~max_wait:0. () in
Bechamel_simple_cli.thread_workload ~before:Tracing_export.main ~after
~run:ignore

Expand All @@ -52,7 +52,7 @@ let allocate () =

let free t =
Tracing.TracerProvider.destroy ~uuid ;
Tracing_export.flush_and_exit () ;
Tracing_export.flush_and_exit ~max_wait:0. () ;
Thread.join t

let test_tracing_on ?(overflow = false) ~name f =
Expand Down
2 changes: 2 additions & 0 deletions ocaml/xapi/xapi_fuse.ml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ let light_fuse_and_run ?(fuse_length = !Constants.fuse_time) () =
in
let new_fuse_length = max 5. (fuse_length -. delay_so_far) in
debug "light_fuse_and_run: current RRDs have been saved" ;
ignore
(Thread.create Tracing_export.(flush_and_exit ~max_wait:new_fuse_length) ()) ;
ignore
(Thread.create
(fun () ->
Expand Down
29 changes: 12 additions & 17 deletions ocaml/xs-trace/dune
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
(executable
(modes exe)
(name xs_trace)
(public_name xs-trace)
(package xapi-tools)
(libraries
uri
tracing
cmdliner
tracing_export
xapi-stdext-unix
zstd
)
)
(modes exe)
(name xs_trace)
(public_name xs-trace)
(package xapi-tools)
(libraries uri tracing cmdliner tracing_export yojson xapi-stdext-unix zstd))

(rule
(targets xs-trace.1)
(deps (:exe xs_trace.exe))
(action (with-stdout-to %{targets} (run %{exe} --help=groff)))
)
(targets xs-trace.1)
(deps
(:exe xs_trace.exe))
(action
(with-stdout-to
%{targets}
(run %{exe} --help=groff))))

; not expected by the specfile
;(install
Expand Down
32 changes: 23 additions & 9 deletions ocaml/xs-trace/xs_trace.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ module Exporter = struct
| _ ->
()

(** Export traces from file system to a remote endpoint. *)
let export erase src dst =
let dst = Uri.of_string dst in
let submit_json = submit_json dst in
let iter_src src f =
let rec export_file = function
| path when Sys.is_directory path ->
(* Recursively export trace files. *)
Expand All @@ -38,7 +35,7 @@ module Exporter = struct
(* Decompress compressed trace file and submit each line iteratively *)
let args = [|"zstdcat"; path|] in
let ic = Unix.open_process_args_in args.(0) args in
Unixext.lines_iter submit_json ic ;
Unixext.lines_iter f ic ;
match Unix.close_process_in ic with
| Unix.WEXITED 0 ->
()
Expand All @@ -47,15 +44,27 @@ module Exporter = struct
)
| path when Filename.check_suffix path ".ndjson" ->
(* Submit traces line by line. *)
Unixext.readfile_line submit_json path
Unixext.readfile_line f path
| path ->
(* Assume any other extension is a valid JSON file. *)
let json = Unixext.string_of_file path in
submit_json json
f json
in
export_file src ;
export_file src

(** Export traces from file system to a remote endpoint. *)
let export erase src dst =
let dst = Uri.of_string dst in
let submit_json = submit_json dst in
iter_src src submit_json ;
if erase then
Unixext.rm_rec ~rm_top:true src

let pretty_print src =
iter_src src @@ fun line ->
line
|> Yojson.Safe.from_string
|> Yojson.Safe.pretty_to_channel ~std:true stdout
end

module Cli = struct
Expand Down Expand Up @@ -83,6 +92,11 @@ module Cli = struct
let doc = "copy a trace to an endpoint and erase it afterwards" in
Cmd.(v (info "mv" ~doc) term)

let pp_cmd =
let term = Term.(const Exporter.pretty_print $ src) in
let doc = "Pretty print NDJSON traces" in
Cmd.(v (info "pp" ~doc) term)

let xs_trace_cmd =
let man =
[
Expand All @@ -94,7 +108,7 @@ module Cli = struct
let doc = "utility for working with local trace files" in
Cmd.info "xs-trace" ~doc ~version:"0.1" ~man
in
Cmd.group desc [cp_cmd; mv_cmd]
Cmd.group desc [cp_cmd; mv_cmd; pp_cmd]

let main () = Cmd.eval xs_trace_cmd
end
Expand Down
Loading