@@ -33,10 +33,15 @@ import (
33
33
"golang.org/x/sync/errgroup"
34
34
"golang.org/x/time/rate"
35
35
"gopkg.in/yaml.v2"
36
+
36
37
apierrors "k8s.io/apimachinery/pkg/api/errors"
38
+
39
+ corev1 "k8s.io/api/core/v1"
40
+
37
41
"k8s.io/apimachinery/pkg/api/meta"
38
42
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
39
43
k8sruntime "k8s.io/apimachinery/pkg/runtime"
44
+ utilerror "k8s.io/apimachinery/pkg/util/errors"
40
45
"k8s.io/client-go/kubernetes"
41
46
"k8s.io/client-go/rest"
42
47
"k8s.io/klog/v2"
@@ -46,7 +51,10 @@ import (
46
51
"antrea.io/antrea/pkg/apis/crd/v1beta1"
47
52
systemv1beta1 "antrea.io/antrea/pkg/apis/system/v1beta1"
48
53
antrea "antrea.io/antrea/pkg/client/clientset/versioned"
54
+
49
55
systemclientset "antrea.io/antrea/pkg/client/clientset/versioned/typed/system/v1beta1"
56
+
57
+ "antrea.io/antrea/pkg/util/compress"
50
58
)
51
59
52
60
const (
@@ -584,6 +592,20 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
584
592
return fmt .Errorf ("failed to create clientset: %w" , err )
585
593
}
586
594
595
+ if err := os .MkdirAll (option .dir , 0700 | os .ModeDir ); err != nil {
596
+ return fmt .Errorf ("error when creating output dir: %w" , err )
597
+ }
598
+
599
+ f , err := os .Create (filepath .Join (option .dir , "clusterinfo" ))
600
+ if err != nil {
601
+ return err
602
+ }
603
+ defer f .Close ()
604
+ err = getClusterInfo (f , k8sClientset )
605
+ if err != nil {
606
+ return err
607
+ }
608
+
587
609
var controllerClient systemclientset.SupportBundleInterface
588
610
var agentClients map [string ]systemclientset.SupportBundleInterface
589
611
@@ -628,29 +650,17 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
628
650
return fmt .Errorf ("no matched Nodes found to collect agent bundles" )
629
651
}
630
652
631
- if err := os .MkdirAll (option .dir , 0700 | os .ModeDir ); err != nil {
632
- return fmt .Errorf ("error when creating output dir: %w" , err )
633
- }
634
653
amount := len (agentClients ) * 2
635
654
if controllerClient != nil {
636
655
amount += 2
637
656
}
638
657
bar := barTmpl .Start (amount )
639
658
defer bar .Finish ()
640
659
defer bar .Set ("prefix" , "Finish " )
641
- f , err := os .Create (filepath .Join (option .dir , "clusterinfo" ))
642
- if err != nil {
643
- return err
644
- }
645
- defer f .Close ()
646
- err = getClusterInfo (f , k8sClientset )
647
- if err != nil {
648
- return err
649
- }
650
660
651
661
results := requestAll (ctx , agentClients , controllerClient , bar )
652
662
results = downloadAll (ctx , agentClients , controllerClient , dir , bar , results )
653
- return processResults (results , dir )
663
+ return processResults (antreaClientset , k8sClientset , results , dir )
654
664
}
655
665
656
666
func genErrorMsg (resultMap map [string ]error ) string {
@@ -662,8 +672,9 @@ func genErrorMsg(resultMap map[string]error) string {
662
672
}
663
673
664
674
// processResults will output the failed nodes and their reasons if any. If no data was collected,
665
- // error is returned, otherwise will return nil.
666
- func processResults (resultMap map [string ]error , dir string ) error {
675
+ // error is returned, otherwise will return nil. For failed nodes and controller, will also trying to get logs from
676
+ // kubernetes api.
677
+ func processResults (antreaClientset antrea.Interface , k8sClient kubernetes.Interface , resultMap map [string ]error , dir string ) error {
667
678
resultStr := ""
668
679
var failedNodes []string
669
680
allFailed := true
@@ -679,7 +690,8 @@ func processResults(resultMap map[string]error, dir string) error {
679
690
}
680
691
}
681
692
682
- if resultMap ["" ] != nil {
693
+ controllerFail := resultMap ["" ] != nil
694
+ if controllerFail {
683
695
fmt .Println ("Controller Info Failed Reason: " + resultMap ["" ].Error ())
684
696
}
685
697
@@ -692,9 +704,149 @@ func processResults(resultMap map[string]error, dir string) error {
692
704
err = writeFailedNodes (dir , failedNodes )
693
705
}
694
706
707
+ // download logs from kubernetes api
708
+ if failedNodes != nil || controllerFail {
709
+ err := downloadPodInfoFromKubernetes (antreaClientset , k8sClient , failedNodes , controllerFail , dir )
710
+ if err != nil {
711
+ fmt .Println ("Failed to download logs from kubernetes api: " + err .Error ())
712
+ } else {
713
+ allFailed = false
714
+ }
715
+ }
716
+
695
717
if allFailed {
696
718
return fmt .Errorf ("no data was collected: %s" , genErrorMsg (resultMap ))
697
719
} else {
698
720
return err
699
721
}
700
722
}
723
+
724
+ // downloadLogsFromKubernetes will try to download pod logs from kubernetes api for failed nodes and controller, as well as controllerinfo/agentinfo...
725
+ func downloadPodInfoFromKubernetes (antreaClientset antrea.Interface , k8sClient kubernetes.Interface , failedNodes []string , isControllerFail bool , dir string ) error {
726
+ agentInfoList , err := antreaClientset .CrdV1beta1 ().AntreaAgentInfos ().List (context .TODO (), metav1.ListOptions {ResourceVersion : "0" })
727
+ if err != nil {
728
+ return err
729
+ }
730
+
731
+ agentInfoMap := map [string ]v1beta1.AntreaAgentInfo {}
732
+ for _ , agentInfo := range agentInfoList .Items {
733
+ agentInfoMap [agentInfo .Name ] = agentInfo
734
+ }
735
+
736
+ pods , err := k8sClient .CoreV1 ().Pods ("kube-system" ).List (context .TODO (), metav1.ListOptions {
737
+ ResourceVersion : "0" ,
738
+ LabelSelector : "app=antrea" ,
739
+ })
740
+ if err != nil {
741
+ return err
742
+ }
743
+
744
+ failedNodesMap := make (map [string ]struct {})
745
+ for _ , node := range failedNodes {
746
+ failedNodesMap [node ] = struct {}{}
747
+ }
748
+
749
+ var errors []error
750
+
751
+ for _ , pod := range pods .Items {
752
+
753
+ tmpDir , err := afero .TempDir (defaultFS , "" , "bundle_tmp_" )
754
+ if err != nil {
755
+ errors = append (errors , err )
756
+ continue
757
+ }
758
+ defer defaultFS .RemoveAll (tmpDir )
759
+
760
+ if pod .Labels ["component" ] == "antrea-controller" && isControllerFail {
761
+ controllerInfo , err := antreaClientset .CrdV1beta1 ().AntreaControllerInfos ().Get (context .TODO (), "antrea-controller" , metav1.GetOptions {})
762
+ if err != nil {
763
+ errors = append (errors , err )
764
+ continue
765
+ }
766
+ data , err := yaml .Marshal (controllerInfo )
767
+ if err != nil {
768
+ errors = append (errors , err )
769
+ continue
770
+ }
771
+ err = afero .WriteFile (defaultFS , filepath .Join (tmpDir , "controllerinfo" ), data , 0644 )
772
+ errors = append (errors , err )
773
+
774
+ err = downloadPodLogs (k8sClient , "controller" , pod .Namespace , pod .Name , []string {"antrea-controller" }, dir , tmpDir )
775
+ errors = append (errors , err )
776
+
777
+ }
778
+
779
+ if _ , exist := failedNodesMap [pod .Spec .NodeName ]; ! exist {
780
+ continue
781
+ }
782
+
783
+ if pod .Labels ["component" ] == "antrea-agent" {
784
+ if agentInfo , ok := agentInfoMap [pod .Spec .NodeName ]; ok {
785
+ data , err := yaml .Marshal (agentInfo )
786
+ if err != nil {
787
+ errors = append (errors , err )
788
+ continue
789
+ }
790
+ err = afero .WriteFile (defaultFS , filepath .Join (tmpDir , "agentinfo" ), data , 0644 )
791
+ errors = append (errors , err )
792
+ }
793
+
794
+ err = downloadPodLogs (k8sClient , "agent_" + pod .Spec .NodeName , pod .Namespace , pod .Name , []string {"antrea-agent" , "antrea-ovs" , "install-cni" }, dir , tmpDir )
795
+ errors = append (errors , err )
796
+
797
+ }
798
+ }
799
+ return utilerror .NewAggregate (errors )
800
+ }
801
+
802
+ func downloadPodLogs (k8sClient kubernetes.Interface , comp string , namespace string , podName string , containers []string , dir string , tmpDir string ) error {
803
+ var errors []error
804
+
805
+ for _ , containerName := range containers {
806
+ containerDirName := containerName
807
+ if strings .HasPrefix (containerName , "antrea-" ) {
808
+ containerDirName = strings .ReplaceAll (containerName , "antrea-" , "" )
809
+ }
810
+
811
+ podLogDir := filepath .Join (tmpDir , "logs" , containerDirName )
812
+ err := os .MkdirAll (podLogDir , 0755 )
813
+ if err != nil {
814
+ return err
815
+ }
816
+
817
+ fileName := filepath .Join (podLogDir , containerName + ".log" )
818
+ f , err := os .Create (fileName )
819
+ if err != nil {
820
+ errors = append (errors , err )
821
+ continue
822
+ }
823
+
824
+ logOption := & corev1.PodLogOptions {
825
+ Container : containerName ,
826
+ }
827
+ logs := k8sClient .CoreV1 ().Pods (namespace ).GetLogs (podName , logOption )
828
+ logStream , err := logs .Stream (context .TODO ())
829
+ if err != nil {
830
+ errors = append (errors , err )
831
+ continue
832
+ }
833
+
834
+ _ , err = io .Copy (f , logStream )
835
+ errors = append (errors , err )
836
+ err = logStream .Close ()
837
+ errors = append (errors , err )
838
+ }
839
+
840
+ gzFileName := filepath .Join (dir , comp + ".tar.gz" )
841
+
842
+ f , err := os .Create (gzFileName )
843
+ if err != nil {
844
+ errors = append (errors , err )
845
+ } else {
846
+ _ , err := compress .PackDir (defaultFS , tmpDir , f )
847
+ errors = append (errors , err )
848
+ }
849
+
850
+ return utilerror .NewAggregate (errors )
851
+
852
+ }
0 commit comments