Skip to content

Commit a51535a

Browse files
authored
Support collecting logs from K8s API as fallback for supportbundle (#3659)
When the normal supportbundle api fails for some nodes or the controller, use the kubernetes api instead to collect Pods' logs. Also, in both cases, clusterinfo will always be collected first. Signed-off-by: Hang Yan <[email protected]>
1 parent 10566ca commit a51535a

File tree

4 files changed

+387
-36
lines changed

4 files changed

+387
-36
lines changed

pkg/antctl/raw/supportbundle/command.go

+180-16
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,14 @@ import (
3333
"golang.org/x/sync/errgroup"
3434
"golang.org/x/time/rate"
3535
"gopkg.in/yaml.v2"
36+
37+
corev1 "k8s.io/api/core/v1"
3638
apierrors "k8s.io/apimachinery/pkg/api/errors"
3739
"k8s.io/apimachinery/pkg/api/meta"
3840
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3941
k8sruntime "k8s.io/apimachinery/pkg/runtime"
42+
utilerror "k8s.io/apimachinery/pkg/util/errors"
43+
"k8s.io/apimachinery/pkg/util/sets"
4044
"k8s.io/client-go/kubernetes"
4145
"k8s.io/client-go/rest"
4246
"k8s.io/klog/v2"
@@ -47,6 +51,8 @@ import (
4751
systemv1beta1 "antrea.io/antrea/pkg/apis/system/v1beta1"
4852
antrea "antrea.io/antrea/pkg/client/clientset/versioned"
4953
systemclientset "antrea.io/antrea/pkg/client/clientset/versioned/typed/system/v1beta1"
54+
"antrea.io/antrea/pkg/util/compress"
55+
"antrea.io/antrea/pkg/util/k8s"
5056
)
5157

5258
const (
@@ -581,6 +587,20 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
581587
return fmt.Errorf("failed to create clientset: %w", err)
582588
}
583589

590+
if err := os.MkdirAll(option.dir, 0700); err != nil {
591+
return fmt.Errorf("error when creating output dir: %w", err)
592+
}
593+
594+
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
595+
if err != nil {
596+
return err
597+
}
598+
defer f.Close()
599+
err = getClusterInfo(f, k8sClientset)
600+
if err != nil {
601+
return err
602+
}
603+
584604
var controllerClient systemclientset.SupportBundleInterface
585605
var agentClients map[string]systemclientset.SupportBundleInterface
586606

@@ -625,29 +645,17 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
625645
return fmt.Errorf("no matched Nodes found to collect agent bundles")
626646
}
627647

628-
if err := os.MkdirAll(option.dir, 0700|os.ModeDir); err != nil {
629-
return fmt.Errorf("error when creating output dir: %w", err)
630-
}
631648
amount := len(agentClients) * 2
632649
if controllerClient != nil {
633650
amount += 2
634651
}
635652
bar := barTmpl.Start(amount)
636653
defer bar.Finish()
637654
defer bar.Set("prefix", "Finish ")
638-
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
639-
if err != nil {
640-
return err
641-
}
642-
defer f.Close()
643-
err = getClusterInfo(f, k8sClientset)
644-
if err != nil {
645-
return err
646-
}
647655

648656
results := requestAll(ctx, agentClients, controllerClient, bar)
649657
results = downloadAll(ctx, agentClients, controllerClient, dir, bar, results)
650-
return processResults(results, dir)
658+
return processResults(ctx, antreaClientset, k8sClientset, results, dir)
651659
}
652660

653661
func genErrorMsg(resultMap map[string]error) string {
@@ -659,8 +667,9 @@ func genErrorMsg(resultMap map[string]error) string {
659667
}
660668

661669
// processResults will output the failed nodes and their reasons if any. If no data was collected,
662-
// error is returned, otherwise will return nil.
663-
func processResults(resultMap map[string]error, dir string) error {
670+
// error is returned, otherwise will return nil. For failed nodes and controller, will also trying to get logs from
671+
// kubernetes api.
672+
func processResults(ctx context.Context, antreaClientset antrea.Interface, k8sClient kubernetes.Interface, resultMap map[string]error, dir string) error {
664673
resultStr := ""
665674
var failedNodes []string
666675
allFailed := true
@@ -676,7 +685,8 @@ func processResults(resultMap map[string]error, dir string) error {
676685
}
677686
}
678687

679-
if resultMap[""] != nil {
688+
controllerFailed := resultMap[""] != nil
689+
if controllerFailed {
680690
fmt.Println("Controller Info Failed Reason: " + resultMap[""].Error())
681691
}
682692

@@ -689,9 +699,163 @@ func processResults(resultMap map[string]error, dir string) error {
689699
err = writeFailedNodes(dir, failedNodes)
690700
}
691701

702+
// download logs from kubernetes api
703+
if failedNodes != nil {
704+
if err = downloadFallbackAgentBundleFromKubernetes(ctx, antreaClientset, k8sClient, failedNodes, dir); err != nil {
705+
fmt.Println("Failed to download agent bundle from kubernetes api: " + err.Error())
706+
} else {
707+
allFailed = false
708+
}
709+
}
710+
if controllerFailed {
711+
if err = downloadFallbackControllerBundleFromKubernetes(ctx, antreaClientset, k8sClient, dir); err != nil {
712+
fmt.Println("Failed to download controller bundle from kubernetes api: " + err.Error())
713+
} else {
714+
allFailed = false
715+
}
716+
}
717+
692718
if allFailed {
693719
return fmt.Errorf("no data was collected: %s", genErrorMsg(resultMap))
694720
} else {
695721
return err
696722
}
697723
}
724+
725+
func downloadFallbackControllerBundleFromKubernetes(ctx context.Context, antreaClientset antrea.Interface, k8sClient kubernetes.Interface, dir string) error {
726+
tmpDir, err := afero.TempDir(defaultFS, "", "bundle_tmp_")
727+
if err != nil {
728+
return err
729+
}
730+
defer defaultFS.RemoveAll(tmpDir)
731+
732+
var podRef *corev1.ObjectReference
733+
if err := func() error {
734+
controllerInfo, err := antreaClientset.CrdV1beta1().AntreaControllerInfos().Get(ctx, v1beta1.AntreaControllerInfoResourceName, metav1.GetOptions{})
735+
if err != nil {
736+
return err
737+
}
738+
podRef = &controllerInfo.PodRef
739+
data, err := yaml.Marshal(controllerInfo)
740+
if err != nil {
741+
return err
742+
}
743+
if err := afero.WriteFile(defaultFS, filepath.Join(dir, "controllerinfo"), data, 0644); err != nil {
744+
return err
745+
}
746+
return nil
747+
}(); err != nil {
748+
return err
749+
}
750+
if podRef == nil {
751+
return fmt.Errorf("no podRef found in AntreaControllerInfo")
752+
}
753+
pod, err := k8sClient.CoreV1().Pods(podRef.Namespace).Get(ctx, podRef.Name, metav1.GetOptions{})
754+
if err != nil {
755+
return err
756+
}
757+
if err := downloadPodLogs(ctx, k8sClient, pod.Namespace, pod.Name, k8s.GetPodContainerNames(pod), tmpDir); err != nil {
758+
return err
759+
}
760+
return packPodBundle(pod, dir, tmpDir)
761+
}
762+
763+
func downloadFallbackAgentBundleFromKubernetes(ctx context.Context, antreaClientset antrea.Interface, k8sClient kubernetes.Interface, failedNodes []string, dir string) error {
764+
agentInfoList, err := antreaClientset.CrdV1beta1().AntreaAgentInfos().List(ctx, metav1.ListOptions{ResourceVersion: "0"})
765+
if err != nil {
766+
return err
767+
}
768+
769+
agentInfoMap := map[string]v1beta1.AntreaAgentInfo{}
770+
for _, agentInfo := range agentInfoList.Items {
771+
agentInfoMap[agentInfo.Name] = agentInfo
772+
}
773+
pods, err := k8sClient.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
774+
ResourceVersion: "0",
775+
LabelSelector: "app=antrea,component=antrea-agent",
776+
})
777+
if err != nil {
778+
return err
779+
}
780+
failedNodeSet := sets.NewString(failedNodes...)
781+
var errors []error
782+
for _, pod := range pods.Items {
783+
if !failedNodeSet.Has(pod.Spec.NodeName) {
784+
continue
785+
}
786+
if err := func() error {
787+
tmpDir, err := afero.TempDir(defaultFS, "", "bundle_tmp_")
788+
if err != nil {
789+
return err
790+
}
791+
defer defaultFS.RemoveAll(tmpDir)
792+
if agentInfo, ok := agentInfoMap[pod.Spec.NodeName]; ok {
793+
data, err := yaml.Marshal(agentInfo)
794+
if err != nil {
795+
return err
796+
}
797+
if err = afero.WriteFile(defaultFS, filepath.Join(tmpDir, "agentinfo"), data, 0644); err != nil {
798+
return err
799+
}
800+
}
801+
err = downloadPodLogs(ctx, k8sClient, pod.Namespace, pod.Name, k8s.GetPodContainerNames(&pod), tmpDir)
802+
if err != nil {
803+
return err
804+
}
805+
return packPodBundle(&pod, dir, tmpDir)
806+
}(); err != nil {
807+
errors = append(errors, err)
808+
}
809+
}
810+
return utilerror.NewAggregate(errors)
811+
}
812+
813+
func packPodBundle(pod *corev1.Pod, dir string, bundleDir string) error {
814+
prefix := "agent_"
815+
if strings.Contains(pod.Name, "controller") {
816+
prefix = "controller_"
817+
}
818+
gzFileName := filepath.Join(dir, prefix+pod.Spec.NodeName+".tar.gz")
819+
f, err := defaultFS.Create(gzFileName)
820+
if err != nil {
821+
return err
822+
}
823+
defer f.Close()
824+
_, err = compress.PackDir(defaultFS, bundleDir, f)
825+
return err
826+
}
827+
828+
func downloadPodLogs(ctx context.Context, k8sClient kubernetes.Interface, namespace string, podName string, containers []string, dir string) error {
829+
downloadContainerLogs := func(containerName string) error {
830+
containerDirName, _ := strings.CutPrefix(containerName, "antrea-")
831+
containerLogDir := filepath.Join(dir, "logs", containerDirName)
832+
err := os.MkdirAll(containerLogDir, 0755)
833+
if err != nil {
834+
return err
835+
}
836+
fileName := filepath.Join(containerLogDir, containerName+".log")
837+
f, err := defaultFS.Create(fileName)
838+
if err != nil {
839+
return err
840+
}
841+
defer f.Close()
842+
logOption := &corev1.PodLogOptions{
843+
Container: containerName,
844+
}
845+
logs := k8sClient.CoreV1().Pods(namespace).GetLogs(podName, logOption)
846+
logStream, err := logs.Stream(ctx)
847+
if err != nil {
848+
return err
849+
}
850+
851+
if _, err = io.Copy(f, logStream); err != nil {
852+
return err
853+
}
854+
return logStream.Close()
855+
}
856+
var errors []error
857+
for _, containerName := range containers {
858+
errors = append(errors, downloadContainerLogs(containerName))
859+
}
860+
return utilerror.NewAggregate(errors)
861+
}

0 commit comments

Comments
 (0)