8
8
import time
9
9
import click
10
10
import urllib
11
+ import syslog
11
12
import subprocess
12
13
from swsssdk import ConfigDBConnector
13
14
from swsssdk import SonicV2Connector
@@ -265,6 +266,37 @@ def abort_if_false(ctx, param, value):
265
266
if not value :
266
267
ctx .abort ()
267
268
269
+ def get_container_image_name (container_name ):
270
+ # example image: docker-lldp-sv2:latest
271
+ cmd = "docker inspect --format '{{.Config.Image}}' " + container_name
272
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
273
+ (out , err ) = proc .communicate ()
274
+ if proc .returncode != 0 :
275
+ sys .exit (proc .returncode )
276
+ image_latest = out .rstrip ()
277
+
278
+ # example image_name: docker-lldp-sv2
279
+ cmd = "echo " + image_latest + " | cut -d ':' -f 1"
280
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
281
+ image_name = proc .stdout .read ().rstrip ()
282
+ return image_name
283
+
284
+ def get_container_image_id (image_tag ):
285
+ # TODO: extract commond docker info fetching functions
286
+ # this is image_id for image with tag, like 'docker-teamd:latest'
287
+ cmd = "docker images --format '{{.ID}}' " + image_tag
288
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
289
+ image_id = proc .stdout .read ().rstrip ()
290
+ return image_id
291
+
292
+ def get_container_image_id_all (image_name ):
293
+ # All images id under the image name like 'docker-teamd'
294
+ cmd = "docker images --format '{{.ID}}' " + image_name
295
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
296
+ image_id_all = proc .stdout .read ()
297
+ image_id_all = image_id_all .splitlines ()
298
+ image_id_all = set (image_id_all )
299
+ return image_id_all
268
300
269
301
# Main entrypoint
270
302
@click .group ()
@@ -433,27 +465,19 @@ def cleanup():
433
465
@cli .command ()
434
466
@click .option ('-y' , '--yes' , is_flag = True , callback = abort_if_false ,
435
467
expose_value = False , prompt = 'New docker image will be installed, continue?' )
436
- @click .option ('--cleanup_image' , is_flag = True , help = "Clean up old docker image(s) " )
437
- @click .option ('--enforce_check ' , is_flag = True , help = "Enforce pending task check for docker upgrade" )
468
+ @click .option ('--cleanup_image' , is_flag = True , help = "Clean up old docker image" )
469
+ @click .option ('--skip_check ' , is_flag = True , help = "Skip task check for docker upgrade" )
438
470
@click .option ('--tag' , type = str , help = "Tag for the new docker image" )
471
+ @click .option ('--warm' , is_flag = True , help = "Perform warm upgrade" )
439
472
@click .argument ('container_name' , metavar = '<container_name>' , required = True ,
440
- type = click .Choice (["swss" , "snmp" , "lldp" , "bgp" , "pmon" , "dhcp_relay" , "telemetry" , "teamd" ]))
473
+ type = click .Choice (["swss" , "snmp" , "lldp" , "bgp" , "pmon" , "dhcp_relay" , "telemetry" , "teamd" , "radv" , "amon" ]))
441
474
@click .argument ('url' )
442
- def upgrade_docker (container_name , url , cleanup_image , enforce_check , tag ):
475
+ def upgrade_docker (container_name , url , cleanup_image , skip_check , tag , warm ):
443
476
""" Upgrade docker image from local binary or URL"""
444
477
445
- # example image: docker-lldp-sv2:latest
446
- cmd = "docker inspect --format '{{.Config.Image}}' " + container_name
447
- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
448
- (out , err ) = proc .communicate ()
449
- if proc .returncode != 0 :
450
- sys .exit (proc .returncode )
451
- image_latest = out .rstrip ()
452
-
453
- # example image_name: docker-lldp-sv2
454
- cmd = "echo " + image_latest + " | cut -d ':' -f 1"
455
- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
456
- image_name = proc .stdout .read ().rstrip ()
478
+ image_name = get_container_image_name (container_name )
479
+ image_latest = image_name + ":latest"
480
+ image_id_previous = get_container_image_id (image_latest )
457
481
458
482
DEFAULT_IMAGE_PATH = os .path .join ("/tmp/" , image_name )
459
483
if url .startswith ('http://' ) or url .startswith ('https://' ):
@@ -474,87 +498,166 @@ def upgrade_docker(container_name, url, cleanup_image, enforce_check, tag):
474
498
click .echo ("Image file '{}' does not exist or is not a regular file. Aborting..." .format (image_path ))
475
499
raise click .Abort ()
476
500
477
- warm = False
501
+ warm_configured = False
478
502
# warm restart enable/disable config is put in stateDB, not persistent across cold reboot, not saved to config_DB.json file
479
503
state_db = SonicV2Connector (host = '127.0.0.1' )
480
504
state_db .connect (state_db .STATE_DB , False )
481
505
TABLE_NAME_SEPARATOR = '|'
482
506
prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR
483
507
_hash = '{}{}' .format (prefix , container_name )
484
508
if state_db .get (state_db .STATE_DB , _hash , "enable" ) == "true" :
485
- warm = True
509
+ warm_configured = True
486
510
state_db .close (state_db .STATE_DB )
487
511
512
+ if container_name == "swss" or container_name == "bgp" or container_name == "teamd" :
513
+ if warm_configured == False and warm :
514
+ run_command ("config warm_restart enable %s" % container_name )
515
+
516
+ # Fetch tag of current running image
517
+ tag_previous = get_docker_tag_name (image_latest )
518
+ # Load the new image beforehand to shorten disruption time
519
+ run_command ("docker load < %s" % image_path )
520
+ warm_app_names = []
488
521
# warm restart specific procssing for swss, bgp and teamd dockers.
489
- if warm == True :
522
+ if warm_configured == True or warm :
490
523
# make sure orchagent is in clean state if swss is to be upgraded
491
524
if container_name == "swss" :
492
- skipPendingTaskCheck = " -s"
493
- if enforce_check :
494
- skipPendingTaskCheck = ""
495
-
496
- cmd = "docker exec -i swss orchagent_restart_check -w 1000 " + skipPendingTaskCheck
497
- for i in range (1 , 6 ):
498
- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
499
- (out , err ) = proc .communicate ()
500
- if proc .returncode != 0 :
501
- if enforce_check :
502
- click .echo ("Orchagent is not in clean state, RESTARTCHECK failed {}" .format (i ))
503
- if i == 5 :
504
- sys .exit (proc .returncode )
505
- else :
506
- click .echo ("Orchagent is not in clean state, upgrading it anyway" )
507
- break
525
+ skipPendingTaskCheck = ""
526
+ if skip_check :
527
+ skipPendingTaskCheck = " -s"
528
+
529
+ cmd = "docker exec -i swss orchagent_restart_check -w 2000 -r 5 " + skipPendingTaskCheck
530
+
531
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
532
+ (out , err ) = proc .communicate ()
533
+ if proc .returncode != 0 :
534
+ if not skip_check :
535
+ click .echo ("Orchagent is not in clean state, RESTARTCHECK failed" )
536
+ # Restore orignal config before exit
537
+ if warm_configured == False and warm :
538
+ run_command ("config warm_restart disable %s" % container_name )
539
+ # Clean the image loaded earlier
540
+ image_id_latest = get_container_image_id (image_latest )
541
+ run_command ("docker rmi -f %s" % image_id_latest )
542
+ # Re-point latest tag to previous tag
543
+ run_command ("docker tag %s:%s %s" % (image_name , tag_previous , image_latest ))
544
+
545
+ sys .exit (proc .returncode )
508
546
else :
509
- click .echo ("Orchagent is in clean state and frozen for warm upgrade" )
510
- break
511
- run_command ("sleep 1" )
547
+ click .echo ("Orchagent is not in clean state, upgrading it anyway" )
548
+ else :
549
+ click .echo ("Orchagent is in clean state and frozen for warm upgrade" )
550
+
551
+ warm_app_names = ["orchagent" , "neighsyncd" ]
512
552
513
553
elif container_name == "bgp" :
514
554
# Kill bgpd to restart the bgp graceful restart procedure
515
555
click .echo ("Stopping bgp ..." )
516
556
run_command ("docker exec -i bgp pkill -9 zebra" )
517
557
run_command ("docker exec -i bgp pkill -9 bgpd" )
518
- run_command ( "sleep 2" ) # wait 2 seconds for bgp to settle down
558
+ warm_app_names = [ " bgp" ]
519
559
click .echo ("Stopped bgp ..." )
520
560
521
561
elif container_name == "teamd" :
522
562
click .echo ("Stopping teamd ..." )
523
563
# Send USR1 signal to all teamd instances to stop them
524
564
# It will prepare teamd for warm-reboot
525
565
run_command ("docker exec -i teamd pkill -USR1 teamd > /dev/null" )
526
- run_command ( "sleep 2" ) # wait 2 seconds for teamd to settle down
566
+ warm_app_names = [ "teamsyncd" ]
527
567
click .echo ("Stopped teamd ..." )
528
568
529
- run_command ("systemctl stop %s" % container_name )
569
+ # clean app reconcilation state from last warm start if exists
570
+ for warm_app_name in warm_app_names :
571
+ cmd = "docker exec -i database redis-cli -n 6 hdel 'WARM_RESTART_TABLE|" + warm_app_name + "' state"
572
+ run_command (cmd )
573
+
574
+ run_command ("docker kill %s > /dev/null" % container_name )
530
575
run_command ("docker rm %s " % container_name )
531
- run_command ("docker rmi %s " % image_latest )
532
- run_command ("docker load < %s" % image_path )
533
576
if tag == None :
534
577
# example image: docker-lldp-sv2:latest
535
578
tag = get_docker_tag_name (image_latest )
536
579
run_command ("docker tag %s:latest %s:%s" % (image_name , image_name , tag ))
537
580
run_command ("systemctl restart %s" % container_name )
538
581
539
- # Clean up old docker images
540
- if cleanup_image :
541
- # All images id under the image name
542
- cmd = "docker images --format '{{.ID}}' " + image_name
543
- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
544
- image_id_all = proc .stdout .read ()
545
- image_id_all = image_id_all .splitlines ()
546
- image_id_all = set (image_id_all )
547
-
548
- # this is image_id for image with "latest" tag
549
- cmd = "docker images --format '{{.ID}}' " + image_latest
550
- proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
551
- image_id_latest = proc .stdout .read ().rstrip ()
552
-
553
- for id in image_id_all :
554
- if id != image_id_latest :
555
- run_command ("docker rmi -f %s" % id )
556
-
557
- run_command ("sleep 5" ) # wait 5 seconds for application to sync
582
+ # All images id under the image name
583
+ image_id_all = get_container_image_id_all (image_name )
584
+
585
+ # this is image_id for image with "latest" tag
586
+ image_id_latest = get_container_image_id (image_latest )
587
+
588
+ for id in image_id_all :
589
+ if id != image_id_latest :
590
+ # Unless requested, the previoud docker image will be preserved
591
+ if not cleanup_image and id == image_id_previous :
592
+ continue
593
+ run_command ("docker rmi -f %s" % id )
594
+
595
+ exp_state = "reconciled"
596
+ state = ""
597
+ # post warm restart specific procssing for swss, bgp and teamd dockers, wait for reconciliation state.
598
+ if warm_configured == True or warm :
599
+ count = 0
600
+ for warm_app_name in warm_app_names :
601
+ state = ""
602
+ cmd = "docker exec -i database redis-cli -n 6 hget 'WARM_RESTART_TABLE|" + warm_app_name + "' state"
603
+ # Wait up to 180 seconds for reconciled state
604
+ while state != exp_state and count < 90 :
605
+ sys .stdout .write ("\r {}: " .format (warm_app_name ))
606
+ sys .stdout .write ("[%-s" % ('=' * count ))
607
+ sys .stdout .flush ()
608
+ count += 1
609
+ time .sleep (2 )
610
+ proc = subprocess .Popen (cmd , stdout = subprocess .PIPE , shell = True )
611
+ state = proc .stdout .read ().rstrip ()
612
+ syslog .syslog ("%s reached %s state" % (warm_app_name , state ))
613
+ sys .stdout .write ("]\n \r " )
614
+ if state != exp_state :
615
+ click .echo ("%s failed to reach %s state" % (warm_app_name , exp_state ))
616
+ syslog .syslog (syslog .LOG_ERR , "%s failed to reach %s state" % (warm_app_name , exp_state ))
617
+ else :
618
+ exp_state = "" # this is cold upgrade
619
+
620
+ # Restore to previous cold restart setting
621
+ if warm_configured == False and warm :
622
+ if container_name == "swss" or container_name == "bgp" or container_name == "teamd" :
623
+ run_command ("config warm_restart disable %s" % container_name )
624
+
625
+ if state == exp_state :
626
+ click .echo ('Done' )
627
+ else :
628
+ click .echo ('Failed' )
629
+ sys .exit (1 )
630
+
631
+ # rollback docker image
632
+ @cli .command ()
633
+ @click .option ('-y' , '--yes' , is_flag = True , callback = abort_if_false ,
634
+ expose_value = False , prompt = 'Docker image will be rolled back, continue?' )
635
+ @click .argument ('container_name' , metavar = '<container_name>' , required = True ,
636
+ type = click .Choice (["swss" , "snmp" , "lldp" , "bgp" , "pmon" , "dhcp_relay" , "telemetry" , "teamd" , "radv" , "amon" ]))
637
+ def rollback_docker (container_name ):
638
+ """ Rollback docker image to previous version"""
639
+ image_name = get_container_image_name (container_name )
640
+ # All images id under the image name
641
+ image_id_all = get_container_image_id_all (image_name )
642
+ if len (image_id_all ) != 2 :
643
+ click .echo ("Two images required, but there are '{}' images for '{}'. Aborting..." .format (len (image_id_all ), image_name ))
644
+ raise click .Abort ()
645
+
646
+ image_latest = image_name + ":latest"
647
+ image_id_previous = get_container_image_id (image_latest )
648
+
649
+ version_tag = ""
650
+ for id in image_id_all :
651
+ if id != image_id_previous :
652
+ version_tag = get_docker_tag_name (id )
653
+
654
+ # make previous image as latest
655
+ run_command ("docker tag %s:%s %s:latest" % (image_name , version_tag , image_name ))
656
+ if container_name == "swss" or container_name == "bgp" or container_name == "teamd" :
657
+ click .echo ("Cold reboot is required to restore system state after '{}' rollback !!" .format (container_name ))
658
+ else :
659
+ run_command ("systemctl restart %s" % container_name )
660
+
558
661
click .echo ('Done' )
559
662
560
663
if __name__ == '__main__' :
0 commit comments