Skip to content

Commit 26c7a5d

Browse files
committed
Support collect logs for failed agents and controller for supportbundle
When the normal supportbundle api failed for some nodes or controller, use kubernetes api instead to collect logs. Also, in either case, clusterinfo will always be gathered first. Signed-off-by: Hang Yan <[email protected]>
1 parent 42162ce commit 26c7a5d

File tree

2 files changed

+275
-16
lines changed

2 files changed

+275
-16
lines changed

pkg/antctl/raw/supportbundle/command.go

+206-16
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"encoding/json"
2121
"fmt"
2222
"io"
23+
2324
"net"
2425
"os"
2526
"path"
@@ -29,21 +30,27 @@ import (
2930
"time"
3031

3132
"github.com/cheggaaa/pb/v3"
33+
"github.com/spf13/afero"
3234
"github.com/spf13/cobra"
3335
"golang.org/x/sync/errgroup"
3436
"golang.org/x/time/rate"
3537
"gopkg.in/yaml.v2"
38+
corev1 "k8s.io/api/core/v1"
3639
"k8s.io/apimachinery/pkg/api/meta"
3740
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3841
k8sruntime "k8s.io/apimachinery/pkg/runtime"
42+
utilerror "k8s.io/apimachinery/pkg/util/errors"
43+
"k8s.io/apimachinery/pkg/util/sets"
3944
"k8s.io/client-go/kubernetes"
4045
"k8s.io/client-go/rest"
4146
"k8s.io/klog/v2"
4247

4348
"antrea.io/antrea/pkg/antctl/raw"
4449
"antrea.io/antrea/pkg/antctl/runtime"
50+
"antrea.io/antrea/pkg/apis/crd/v1beta1"
4551
systemv1beta1 "antrea.io/antrea/pkg/apis/system/v1beta1"
4652
antrea "antrea.io/antrea/pkg/client/clientset/versioned"
53+
"antrea.io/antrea/pkg/util/compress"
4754
"antrea.io/antrea/pkg/util/ip"
4855
"antrea.io/antrea/pkg/util/k8s"
4956
)
@@ -59,6 +66,10 @@ const (
5966
// Command is the support bundle command implementation.
6067
var Command *cobra.Command
6168

69+
var (
70+
defaultFS = afero.NewOsFs()
71+
)
72+
6273
var option = &struct {
6374
dir string
6475
labelSelector string
@@ -533,6 +544,20 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
533544
return fmt.Errorf("failed to create clientset: %w", err)
534545
}
535546

547+
if err := os.MkdirAll(option.dir, 0700|os.ModeDir); err != nil {
548+
return fmt.Errorf("error when creating output dir: %w", err)
549+
}
550+
551+
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
552+
if err != nil {
553+
return err
554+
}
555+
defer f.Close()
556+
err = getClusterInfo(f, k8sClientset)
557+
if err != nil {
558+
return err
559+
}
560+
536561
var controllerClient *rest.RESTClient
537562
var agentClients map[string]*rest.RESTClient
538563

@@ -577,29 +602,17 @@ func controllerRemoteRunE(cmd *cobra.Command, args []string) error {
577602
return fmt.Errorf("no matched Nodes found to collect agent bundles")
578603
}
579604

580-
if err := os.MkdirAll(option.dir, 0700|os.ModeDir); err != nil {
581-
return fmt.Errorf("error when creating output dir: %w", err)
582-
}
583605
amount := len(agentClients) * 2
584606
if controllerClient != nil {
585607
amount += 2
586608
}
587609
bar := barTmpl.Start(amount)
588610
defer bar.Finish()
589611
defer bar.Set("prefix", "Finish ")
590-
f, err := os.Create(filepath.Join(option.dir, "clusterinfo"))
591-
if err != nil {
592-
return err
593-
}
594-
defer f.Close()
595-
err = getClusterInfo(f, k8sClientset)
596-
if err != nil {
597-
return err
598-
}
599612

600613
results := requestAll(agentClients, controllerClient, bar)
601614
results = downloadAll(agentClients, controllerClient, dir, bar, results)
602-
return processResults(results, dir)
615+
return processResults(antreaClientset, k8sClientset, results, dir)
603616
}
604617

605618
func genErrorMsg(resultMap map[string]error) string {
@@ -611,8 +624,9 @@ func genErrorMsg(resultMap map[string]error) string {
611624
}
612625

613626
// processResults will output the failed nodes and their reasons if any. If no data was collected,
614-
// error is returned, otherwise will return nil.
615-
func processResults(resultMap map[string]error, dir string) error {
627+
// error is returned, otherwise will return nil. For failed nodes and controller, will also try to get logs from
628+
// kubernetes api.
629+
func processResults(antreaClientset antrea.Interface, k8sClient kubernetes.Interface, resultMap map[string]error, dir string) error {
616630
resultStr := ""
617631
var failedNodes []string
618632
allFailed := true
@@ -628,7 +642,8 @@ func processResults(resultMap map[string]error, dir string) error {
628642
}
629643
}
630644

631-
if resultMap[""] != nil {
645+
controllerFail := resultMap[""] != nil
646+
if controllerFail {
632647
fmt.Println("Controller Info Failed Reason: " + resultMap[""].Error())
633648
}
634649

@@ -641,9 +656,184 @@ func processResults(resultMap map[string]error, dir string) error {
641656
err = writeFailedNodes(dir, failedNodes)
642657
}
643658

659+
if controllerFail {
660+
err := downloadControllerBundleFromKubernetes(antreaClientset, k8sClient, dir)
661+
if err != nil {
662+
fmt.Println("Failed to download controller logs from kubernetes api: " + err.Error())
663+
} else {
664+
allFailed = false
665+
}
666+
}
667+
668+
// download logs from kubernetes api
669+
if failedNodes != nil {
670+
err := downloadAgentBundleFromKubernetes(antreaClientset, k8sClient, failedNodes, dir)
671+
if err != nil {
672+
fmt.Println("Failed to download agent logs from kubernetes api: " + err.Error())
673+
} else {
674+
allFailed = false
675+
}
676+
}
677+
644678
if allFailed {
645679
return fmt.Errorf("no data was collected: %s", genErrorMsg(resultMap))
646680
} else {
647681
return err
648682
}
649683
}
684+
685+
func downloadControllerBundleFromKubernetes(antreaClientset antrea.Interface, k8sClient kubernetes.Interface, dir string) error {
686+
var errors []error
687+
controllerInfo, err := antreaClientset.CrdV1beta1().AntreaControllerInfos().Get(context.TODO(), "antrea-controller", metav1.GetOptions{})
688+
if err != nil {
689+
errors = append(errors, err)
690+
}
691+
692+
pods, err := k8sClient.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{
693+
ResourceVersion: "0",
694+
LabelSelector: "app=antrea,component=antrea-controller",
695+
})
696+
if err != nil {
697+
return err
698+
}
699+
700+
for _, pod := range pods.Items {
701+
tmpDir, err := afero.TempDir(defaultFS, "", "bundle_tmp_")
702+
if err != nil {
703+
errors = append(errors, err)
704+
continue
705+
}
706+
defer defaultFS.RemoveAll(tmpDir)
707+
708+
if controllerInfo != nil {
709+
data, err := yaml.Marshal(controllerInfo)
710+
if err != nil {
711+
errors = append(errors, err)
712+
} else {
713+
err = afero.WriteFile(defaultFS, filepath.Join(tmpDir, "controllerinfo"), data, 0644)
714+
errors = append(errors, err)
715+
}
716+
}
717+
718+
err = downloadPodLogs(k8sClient, "controller", pod.Namespace, pod.Name, []string{"antrea-controller"}, dir, tmpDir)
719+
errors = append(errors, err)
720+
}
721+
return utilerror.NewAggregate(errors)
722+
}
723+
724+
func downloadAgentBundleFromKubernetes(antreaClientset antrea.Interface, k8sClient kubernetes.Interface, failedNodes []string, dir string) error {
725+
var errors []error
726+
agentInfoList, err := antreaClientset.CrdV1beta1().AntreaAgentInfos().List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
727+
if err != nil {
728+
// in case when there are no agentInfo exist, but there are still pods running. we can ignore this error
729+
// and get pod logs
730+
errors = append(errors, err)
731+
}
732+
733+
agentInfoMap := map[string]v1beta1.AntreaAgentInfo{}
734+
for _, agentInfo := range agentInfoList.Items {
735+
agentInfoMap[agentInfo.Name] = agentInfo
736+
}
737+
738+
pods, err := k8sClient.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{
739+
ResourceVersion: "0",
740+
LabelSelector: "app=antrea,component=antrea-agent",
741+
})
742+
if err != nil {
743+
return err
744+
}
745+
746+
failedNodesSet := sets.NewString(failedNodes...)
747+
748+
for _, pod := range pods.Items {
749+
750+
tmpDir, err := afero.TempDir(defaultFS, "", "bundle_tmp_")
751+
if err != nil {
752+
errors = append(errors, err)
753+
continue
754+
}
755+
defer defaultFS.RemoveAll(tmpDir)
756+
757+
if !failedNodesSet.Has(pod.Spec.NodeName) {
758+
continue
759+
}
760+
761+
if agentInfo, ok := agentInfoMap[pod.Spec.NodeName]; ok {
762+
data, err := yaml.Marshal(agentInfo)
763+
if err != nil {
764+
errors = append(errors, err)
765+
}
766+
err = afero.WriteFile(defaultFS, filepath.Join(tmpDir, "agentinfo"), data, 0644)
767+
errors = append(errors, err)
768+
}
769+
770+
var containerNames []string
771+
for _, container := range pod.Spec.Containers {
772+
containerNames = append(containerNames, container.Name)
773+
}
774+
for _, container := range pod.Spec.InitContainers {
775+
containerNames = append(containerNames, container.Name)
776+
}
777+
778+
err = downloadPodLogs(k8sClient, "agent_"+pod.Spec.NodeName, pod.Namespace, pod.Name, containerNames, dir, tmpDir)
779+
errors = append(errors, err)
780+
781+
}
782+
return utilerror.NewAggregate(errors)
783+
784+
}
785+
786+
func downloadPodLogs(k8sClient kubernetes.Interface, comp string, namespace string, podName string, containers []string, dir string, tmpDir string) error {
787+
var errors []error
788+
789+
for _, containerName := range containers {
790+
containerDirName := containerName
791+
if strings.HasPrefix(containerName, "antrea-") {
792+
containerDirName = strings.ReplaceAll(containerName, "antrea-", "")
793+
}
794+
795+
podLogDir := filepath.Join(tmpDir, "logs", containerDirName)
796+
err := os.MkdirAll(podLogDir, 0755)
797+
if err != nil {
798+
return err
799+
}
800+
801+
fileName := filepath.Join(podLogDir, containerName+".log")
802+
f, err := os.Create(fileName)
803+
if err != nil {
804+
errors = append(errors, err)
805+
continue
806+
}
807+
808+
logOption := &corev1.PodLogOptions{
809+
Container: containerName,
810+
}
811+
logs := k8sClient.CoreV1().Pods(namespace).GetLogs(podName, logOption)
812+
logStream, err := logs.Stream(context.TODO())
813+
if err != nil {
814+
errors = append(errors, err)
815+
f.Close()
816+
continue
817+
}
818+
819+
_, err = io.Copy(f, logStream)
820+
errors = append(errors, err)
821+
err = logStream.Close()
822+
errors = append(errors, err)
823+
f.Close()
824+
}
825+
826+
gzFileName := filepath.Join(dir, comp+".tar.gz")
827+
828+
f, err := os.Create(gzFileName)
829+
if err != nil {
830+
errors = append(errors, err)
831+
} else {
832+
_, err := compress.PackDir(tmpDir, f)
833+
errors = append(errors, err)
834+
f.Close()
835+
}
836+
837+
return utilerror.NewAggregate(errors)
838+
839+
}

pkg/util/compress/compress.go

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright 2022 Antrea Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package compress
16+
17+
import (
18+
"archive/tar"
19+
"compress/gzip"
20+
"crypto/sha256"
21+
"io"
22+
"os"
23+
"path/filepath"
24+
"strings"
25+
26+
"github.com/spf13/afero"
27+
)
28+
29+
var (
30+
defaultFS = afero.NewOsFs()
31+
)
32+
33+
// PackDir will compress a dir to a tar.gz file
34+
func PackDir(dir string, writer io.Writer) ([]byte, error) {
35+
hash := sha256.New()
36+
gzWriter := gzip.NewWriter(io.MultiWriter(hash, writer))
37+
defer gzWriter.Close()
38+
targzWriter := tar.NewWriter(gzWriter)
39+
defer targzWriter.Close()
40+
err := afero.Walk(defaultFS, dir, func(filePath string, info os.FileInfo, err error) error {
41+
if err != nil {
42+
return err
43+
}
44+
if !info.Mode().IsRegular() || info.IsDir() {
45+
return nil
46+
}
47+
header, err := tar.FileInfoHeader(info, info.Name())
48+
if err != nil {
49+
return err
50+
}
51+
header.Name = strings.TrimPrefix(strings.ReplaceAll(filePath, dir, ""), string(filepath.Separator))
52+
err = targzWriter.WriteHeader(header)
53+
if err != nil {
54+
return err
55+
}
56+
f, err := defaultFS.Open(filePath)
57+
if err != nil {
58+
return err
59+
}
60+
defer f.Close()
61+
_, err = io.Copy(targzWriter, f)
62+
return err
63+
})
64+
if err != nil {
65+
return nil, err
66+
}
67+
68+
return hash.Sum(nil), nil
69+
}

0 commit comments

Comments
 (0)