Skip to content

Commit 1651050

Browse files
authored
[ycabled] add some retry logic for gRPC channel setup;fix no channel gRPC notification (sonic-net#269)
This PR adds some retry logic for setting up channels for gRPC with a prolonged time period This PR also fixes the gRPC notification handling, when a request come for appl DB:HW_MUX_CABLE_TABLE -> state DB:HW_MUX_CABLE_TABLE, previously if the channel was not setup, the daemon does not give back a response, with this change this condition is properly handled. This PR also enhances the channel to keep a keepalive message between server/soc and ycabled when the connections are idle. This enhancement comes from https://github.com/grpc/grpc/blob/master/doc/keepalive.md this feature in gRPC Description Signed-off-by: vaibhav-dahiya <[email protected]>
1 parent 7c0be24 commit 1651050

File tree

1 file changed

+33
-12
lines changed

1 file changed

+33
-12
lines changed

sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py

+33-12
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
SELECT_TIMEOUT = 1000
3131

3232
#gRPC timeouts for RPC
33-
QUERY_ADMIN_FORWARDING_TIMEOUT = 0.1
33+
QUERY_ADMIN_FORWARDING_TIMEOUT = 0.5
3434
SET_ADMIN_FORWARDING_TIMEOUT = 0.5
3535

3636
y_cable_platform_sfputil = None
@@ -379,17 +379,27 @@ def setup_grpc_channel_for_port(port, soc_ip):
379379
private_key=key,
380380
certificate_chain=cert_chain)
381381
"""
382-
helper_logger.log_debug("Y_CABLE_DEBUG:setting up gRPC channel for RPC's {} {}".format(port,soc_ip))
383-
channel = grpc.insecure_channel("{}:{}".format(soc_ip, GRPC_PORT), options=[('grpc.keepalive_timeout_ms', 1000)])
384-
stub = linkmgr_grpc_driver_pb2_grpc.DualToRActiveStub(channel)
382+
helper_logger.log_notice("Setting up gRPC channel for RPC's {} {}".format(port,soc_ip))
385383

386-
channel_ready = grpc.channel_ready_future(channel)
384+
retries = 3
385+
for _ in range(retries):
386+
channel = grpc.insecure_channel("{}:{}".format(soc_ip, GRPC_PORT), options=[('grpc.keepalive_timeout_ms', 2000),
387+
('grpc.keepalive_time_ms', 1000),
388+
('grpc.keepalive_permit_without_calls', True),
389+
('grpc.http2.max_pings_without_data', 0),
390+
('grpc.http2.min_time_between_pings_ms', 2000),
391+
('grpc.http2.min_ping_interval_without_data_ms', 1000)])
392+
stub = linkmgr_grpc_driver_pb2_grpc.DualToRActiveStub(channel)
387393

388-
try:
389-
channel_ready.result(timeout=0.2)
390-
except grpc.FutureTimeoutError:
391-
channel = None
392-
stub = None
394+
channel_ready = grpc.channel_ready_future(channel)
395+
396+
try:
397+
channel_ready.result(timeout=2)
398+
except grpc.FutureTimeoutError:
399+
channel = None
400+
stub = None
401+
else:
402+
break
393403

394404
if stub is None:
395405
helper_logger.log_warning("stub was not setup for gRPC soc ip {} port {}, no gRPC soc server running ?".format(soc_ip, port))
@@ -3176,11 +3186,22 @@ def handle_hw_mux_cable_table_grpc_notification(fvp, hw_mux_cable_tbl, asic_inde
31763186
retry_setup_grpc_channel_for_port(port, asic_index)
31773187
stub = grpc_port_stubs.get(port, None)
31783188
if stub is None:
3179-
helper_logger.log_notice(
3180-
"stub was None for performing hw mux RPC port {}, setting it up again did not work".format(port))
3189+
helper_logger.log_warning(
3190+
"gRPC channel was initially not setup for performing hw mux set state RPC port {}, trying to set gRPC channel again also did not work, posting unknown state for stateDB:HW_MUX_CABLE_TABLE".format(port))
3191+
active_side = new_state = 'unknown'
3192+
time_end = datetime.datetime.utcnow().strftime("%Y-%b-%d %H:%M:%S.%f")
3193+
fvs_metrics = swsscommon.FieldValuePairs([('xcvrd_switch_{}_{}_start'.format(toggle_side, new_state), str(time_start)),
3194+
('xcvrd_switch_{}_{}_end'.format(toggle_side, new_state), str(time_end))])
3195+
grpc_metrics_tbl[asic_index].set(port, fvs_metrics)
3196+
3197+
fvs_updated = swsscommon.FieldValuePairs([('state', new_state),
3198+
('read_side', read_side),
3199+
('active_side', str(active_side))])
3200+
hw_mux_cable_tbl[asic_index].set(port, fvs_updated)
31813201
return
31823202

31833203
ret, response = try_grpc(stub.SetAdminForwardingPortState, SET_ADMIN_FORWARDING_TIMEOUT, request)
3204+
31843205
if response is not None:
31853206
# Debug only, remove this section once Server side is Finalized
31863207
hw_response_port_ids = response.portid

0 commit comments

Comments
 (0)