Improve and mitigate warnings around dataloss when flushing (#9846)

grtlr · abey79 · commit 9b07560221ec · 2025-05-02T18:04:39.000+02:00
### Related * Closes #9818. ### What > [!IMPORTANT] > This PR also changes the way `RecordingStream` is free'd in the C/C++ API. Before we called `stream.disconnect`, which unnecessarily replaced the current sink with a _buffered_ sink that would be immediately dropped afterwards. Not only did this cause spam in the log outputs, it also lead to race conditions upon (log) application shutdown. This PR makes it more explicit why we drop data during flushing, by bumping the log messages to `warn!`. It also improves the message by pointing the users to `flush_timeout`. We also bump the default timeout from two seconds to now 3 seconds. It's worth taking note that explicitly calling `flush_blocking` from our SDKs should be able to opt-out of this timeout, to ensure all data is sent. This will be tracked here: * #9845.
diff --git a/crates/store/re_grpc_client/src/message_proxy/write.rs b/crates/store/re_grpc_client/src/message_proxy/write.rs
@@ -88,26 +88,25 @@ impl Client {
         };
 
         let start = std::time::Instant::now();
+
         loop {
             match rx.try_recv() {
                 Ok(_) => {
-                    re_log::debug!("Flush complete");
+                    re_log::trace!("Flush complete");
                     break;
                 }
                 Err(TryRecvError::Empty) => {
-                    let Some(timeout) = self.flush_timeout else {
-                        std::thread::yield_now();
-                        continue;
-                    };
-
-                    let elapsed = start.elapsed();
-                    if elapsed >= timeout {
-                        re_log::debug!("Flush timed out, not all messages were sent");
-                        break;
+                    if let Some(timeout) = self.flush_timeout {
+                        let elapsed = start.elapsed();
+                        if elapsed >= timeout {
+                            re_log::warn!("Flush timed out, not all messages were sent. The timeout can be adjusted when connecting via gRPC.");
+                            break;
+                        }
                     }
+                    std::thread::yield_now();
                 }
                 Err(TryRecvError::Closed) => {
-                    re_log::debug!("Flush failed, not all messages were sent");
+                    re_log::warn!("Flush failed, not all messages were sent");
                     break;
                 }
             }
@@ -123,7 +122,7 @@ impl Drop for Client {
 
         // Quit immediately - no more messages left in the queue
         if let Err(err) = self.shutdown_tx.try_send(()) {
-            re_log::error!("failed to gracefully shut down message proxy client: {err}");
+            re_log::error!("Failed to gracefully shut down message proxy client: {err}");
             return;
         };
 
diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs
@@ -52,7 +52,7 @@ pub fn default_server_addr() -> std::net::SocketAddr {
 #[allow(clippy::unnecessary_wraps)]
 pub fn default_flush_timeout() -> Option<std::time::Duration> {
     // NOTE: This is part of the SDK and meant to be used where we accept `Option<std::time::Duration>` values.
-    Some(std::time::Duration::from_secs(2))
+    Some(std::time::Duration::from_secs(3))
 }
 
 pub use re_log_types::{
diff --git a/crates/top/rerun_c/src/lib.rs b/crates/top/rerun_c/src/lib.rs
@@ -512,7 +512,10 @@ thread_local! {
 pub extern "C" fn rr_recording_stream_free(id: CRecordingStream) {
     if THREAD_LIFE_TRACKER.try_with(|_v| {}).is_ok() {
         if let Some(stream) = RECORDING_STREAMS.lock().remove(id) {
-            stream.disconnect();
+            // Before we called `stream.disconnect()` here`, which unnecessarily replaced the current sink with a
+            // buffered sink that would be immediately dropped afterwards. Not only did this cause spam in the
+            // log outputs, it also lead to race conditions upon (log) application shutdown.
+            drop(stream);
         }
     } else {
         // Yes, at least as of writing we can still log things in this state!

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ pub fn default_server_addr() -> std::net::SocketAddr {`
`52`	`52`	`#[allow(clippy::unnecessary_wraps)]`
`53`	`53`	`pub fn default_flush_timeout() -> Option<std::time::Duration> {`
`54`	`54`	// NOTE: This is part of the SDK and meant to be used where we accept `Option<std::time::Duration>` values.
`55`		`- Some(std::time::Duration::from_secs(2))`
	`55`	`+ Some(std::time::Duration::from_secs(3))`
`56`	`56`	`}`
`57`	`57`
`58`	`58`	`pub use re_log_types::{`