Skip to content

Commit 40c9d09

Browse files
committed
Support collect logs for failed agents and controller for supportbundle
Signed-off-by: Hang Yan <[email protected]>
1 parent b6d4238 commit 40c9d09

File tree

1 file changed

+168
-16
lines changed

1 file changed

+168
-16
lines changed

pkg/antctl/raw/supportbundle/command.go

+168-16
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,15 @@ import (
3333
"golang.org/x/sync/errgroup"
3434
"golang.org/x/time/rate"
3535
"gopkg.in/yaml.v2"
36+
3637
apierrors "k8s.io/apimachinery/pkg/api/errors"
38+
39+
corev1 "k8s.io/api/core/v1"
40+
3741
"k8s.io/apimachinery/pkg/api/meta"
3842
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3943
k8sruntime "k8s.io/apimachinery/pkg/runtime"
44+
utilerror "k8s.io/apimachinery/pkg/util/errors"
4045
"k8s.io/client-go/kubernetes"
4146
"k8s.io/client-go/rest"
4247
"k8s.io/klog/v2"
@@ -46,7 +51,10 @@ import (
4651
"antrea.io/antrea/pkg/apis/crd/v1beta1"
4752
systemv1beta1 "antrea.io/antrea/pkg/apis/system/v1beta1"
4853
antrea "antrea.io/antrea/pkg/client/clientset/versioned"
54+
4955
systemclientset "antrea.io/antrea/pkg/client/clientset/versioned/typed/system/v1beta1"
56+
57+
"antrea.io/antrea/pkg/util/compress"
5058
)
5159

5260
const (
@@ -584,6 +592,20 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
584592
return fmt.Errorf("failed to create clientset: %w", err)
585593
}
586594

595+
if err := os.MkdirAll(option.dir, 0700|os.ModeDir); err != nil {
596+
return fmt.Errorf("error when creating output dir: %w", err)
597+
}
598+
599+
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
600+
if err != nil {
601+
return err
602+
}
603+
defer f.Close()
604+
err = getClusterInfo(f, k8sClientset)
605+
if err != nil {
606+
return err
607+
}
608+
587609
var controllerClient systemclientset.SupportBundleInterface
588610
var agentClients map[string]systemclientset.SupportBundleInterface
589611

@@ -628,29 +650,17 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
628650
return fmt.Errorf("no matched Nodes found to collect agent bundles")
629651
}
630652

631-
if err := os.MkdirAll(option.dir, 0700|os.ModeDir); err != nil {
632-
return fmt.Errorf("error when creating output dir: %w", err)
633-
}
634653
amount := len(agentClients) * 2
635654
if controllerClient != nil {
636655
amount += 2
637656
}
638657
bar := barTmpl.Start(amount)
639658
defer bar.Finish()
640659
defer bar.Set("prefix", "Finish ")
641-
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
642-
if err != nil {
643-
return err
644-
}
645-
defer f.Close()
646-
err = getClusterInfo(f, k8sClientset)
647-
if err != nil {
648-
return err
649-
}
650660

651661
results := requestAll(ctx, agentClients, controllerClient, bar)
652662
results = downloadAll(ctx, agentClients, controllerClient, dir, bar, results)
653-
return processResults(results, dir)
663+
return processResults(antreaClientset, k8sClientset, results, dir)
654664
}
655665

656666
func genErrorMsg(resultMap map[string]error) string {
@@ -662,8 +672,9 @@ func genErrorMsg(resultMap map[string]error) string {
662672
}
663673

664674
// processResults will output the failed nodes and their reasons if any. If no data was collected,
665-
// error is returned, otherwise will return nil.
666-
func processResults(resultMap map[string]error, dir string) error {
675+
// error is returned, otherwise will return nil. For failed nodes and controller, will also trying to get logs from
676+
// kubernetes api.
677+
func processResults(antreaClientset antrea.Interface, k8sClient kubernetes.Interface, resultMap map[string]error, dir string) error {
667678
resultStr := ""
668679
var failedNodes []string
669680
allFailed := true
@@ -679,7 +690,8 @@ func processResults(resultMap map[string]error, dir string) error {
679690
}
680691
}
681692

682-
if resultMap[""] != nil {
693+
controllerFail := resultMap[""] != nil
694+
if controllerFail {
683695
fmt.Println("Controller Info Failed Reason: " + resultMap[""].Error())
684696
}
685697

@@ -692,9 +704,149 @@ func processResults(resultMap map[string]error, dir string) error {
692704
err = writeFailedNodes(dir, failedNodes)
693705
}
694706

707+
// download logs from kubernetes api
708+
if failedNodes != nil || controllerFail {
709+
err := downloadPodInfoFromKubernetes(antreaClientset, k8sClient, failedNodes, controllerFail, dir)
710+
if err != nil {
711+
fmt.Println("Failed to download logs from kubernetes api: " + err.Error())
712+
} else {
713+
allFailed = false
714+
}
715+
}
716+
695717
if allFailed {
696718
return fmt.Errorf("no data was collected: %s", genErrorMsg(resultMap))
697719
} else {
698720
return err
699721
}
700722
}
723+
724+
// downloadLogsFromKubernetes will try to download pod logs from kubernetes api for failed nodes and controller, as well as controllerinfo/agentinfo...
725+
func downloadPodInfoFromKubernetes(antreaClientset antrea.Interface, k8sClient kubernetes.Interface, failedNodes []string, isControllerFail bool, dir string) error {
726+
agentInfoList, err := antreaClientset.CrdV1beta1().AntreaAgentInfos().List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
727+
if err != nil {
728+
return err
729+
}
730+
731+
agentInfoMap := map[string]v1beta1.AntreaAgentInfo{}
732+
for _, agentInfo := range agentInfoList.Items {
733+
agentInfoMap[agentInfo.Name] = agentInfo
734+
}
735+
736+
pods, err := k8sClient.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{
737+
ResourceVersion: "0",
738+
LabelSelector: "app=antrea",
739+
})
740+
if err != nil {
741+
return err
742+
}
743+
744+
failedNodesMap := make(map[string]struct{})
745+
for _, node := range failedNodes {
746+
failedNodesMap[node] = struct{}{}
747+
}
748+
749+
var errors []error
750+
751+
for _, pod := range pods.Items {
752+
753+
tmpDir, err := afero.TempDir(defaultFS, "", "bundle_tmp_")
754+
if err != nil {
755+
errors = append(errors, err)
756+
continue
757+
}
758+
defer defaultFS.RemoveAll(tmpDir)
759+
760+
if pod.Labels["component"] == "antrea-controller" && isControllerFail {
761+
controllerInfo, err := antreaClientset.CrdV1beta1().AntreaControllerInfos().Get(context.TODO(), "antrea-controller", metav1.GetOptions{})
762+
if err != nil {
763+
errors = append(errors, err)
764+
continue
765+
}
766+
data, err := yaml.Marshal(controllerInfo)
767+
if err != nil {
768+
errors = append(errors, err)
769+
continue
770+
}
771+
err = afero.WriteFile(defaultFS, filepath.Join(tmpDir, "controllerinfo"), data, 0644)
772+
errors = append(errors, err)
773+
774+
err = downloadPodLogs(k8sClient, "controller", pod.Namespace, pod.Name, []string{"antrea-controller"}, dir, tmpDir)
775+
errors = append(errors, err)
776+
777+
}
778+
779+
if _, exist := failedNodesMap[pod.Spec.NodeName]; !exist {
780+
continue
781+
}
782+
783+
if pod.Labels["component"] == "antrea-agent" {
784+
if agentInfo, ok := agentInfoMap[pod.Spec.NodeName]; ok {
785+
data, err := yaml.Marshal(agentInfo)
786+
if err != nil {
787+
errors = append(errors, err)
788+
continue
789+
}
790+
err = afero.WriteFile(defaultFS, filepath.Join(tmpDir, "agentinfo"), data, 0644)
791+
errors = append(errors, err)
792+
}
793+
794+
err = downloadPodLogs(k8sClient, "agent_"+pod.Spec.NodeName, pod.Namespace, pod.Name, []string{"antrea-agent", "antrea-ovs", "install-cni"}, dir, tmpDir)
795+
errors = append(errors, err)
796+
797+
}
798+
}
799+
return utilerror.NewAggregate(errors)
800+
}
801+
802+
func downloadPodLogs(k8sClient kubernetes.Interface, comp string, namespace string, podName string, containers []string, dir string, tmpDir string) error {
803+
var errors []error
804+
805+
for _, containerName := range containers {
806+
containerDirName := containerName
807+
if strings.HasPrefix(containerName, "antrea-") {
808+
containerDirName = strings.ReplaceAll(containerName, "antrea-", "")
809+
}
810+
811+
podLogDir := filepath.Join(tmpDir, "logs", containerDirName)
812+
err := os.MkdirAll(podLogDir, 0755)
813+
if err != nil {
814+
return err
815+
}
816+
817+
fileName := filepath.Join(podLogDir, containerName+".log")
818+
f, err := os.Create(fileName)
819+
if err != nil {
820+
errors = append(errors, err)
821+
continue
822+
}
823+
824+
logOption := &corev1.PodLogOptions{
825+
Container: containerName,
826+
}
827+
logs := k8sClient.CoreV1().Pods(namespace).GetLogs(podName, logOption)
828+
logStream, err := logs.Stream(context.TODO())
829+
if err != nil {
830+
errors = append(errors, err)
831+
continue
832+
}
833+
834+
_, err = io.Copy(f, logStream)
835+
errors = append(errors, err)
836+
err = logStream.Close()
837+
errors = append(errors, err)
838+
}
839+
840+
gzFileName := filepath.Join(dir, comp+".tar.gz")
841+
842+
f, err := os.Create(gzFileName)
843+
if err != nil {
844+
errors = append(errors, err)
845+
} else {
846+
_, err := compress.PackDir(defaultFS, tmpDir, f)
847+
errors = append(errors, err)
848+
}
849+
850+
return utilerror.NewAggregate(errors)
851+
852+
}

0 commit comments

Comments
 (0)