@@ -15,11 +15,11 @@ import (
15
15
)
16
16
17
17
func (s * server ) checkAndRepairPods () {
18
- var repairFailPods map [string ]string
19
18
for {
20
- repairFailPods = s .doCheckAndRepairPods ()
19
+ repairFailPods : = s .doCheckAndRepairPods ()
21
20
if len (repairFailPods ) == 0 {
22
- break
21
+ time .Sleep (time .Second * 10 )
22
+ continue
23
23
}
24
24
for _ , pod := range repairFailPods {
25
25
log .Error ().Msgf (`fail to check and repair pod: %s` , pod )
@@ -40,8 +40,13 @@ func (s *server) doCheckAndRepairPods() map[string]string {
40
40
allPodsByAddr [pod .Status .PodIP ] = fmt .Sprintf (`%s/%s` , pod .Namespace , pod .Name )
41
41
}
42
42
43
- netnsDirs := []string {volume .Netns .MountPath , volume .SysProc .MountPath }
44
- for _ , netnsDir := range netnsDirs {
43
+ log .Debug ().Msgf ("monitoredPodsByAddr Count: %d" , len (monitoredPodsByAddr ))
44
+ log .Debug ().Msgf ("allPodsByAddr Count: %d" , len (allPodsByAddr ))
45
+
46
+ for _ , netnsDir := range volume .Netns {
47
+ if len (monitoredPodsByAddr ) == 0 {
48
+ break
49
+ }
45
50
rd , err := os .ReadDir (netnsDir )
46
51
if err != nil {
47
52
log .Debug ().Err (err ).Msg (netnsDir )
@@ -56,27 +61,27 @@ func (s *server) doCheckAndRepairPods() map[string]string {
56
61
}
57
62
58
63
if doErr := netNS .Do (func (_ ns.NetNS ) error {
59
- ifaces , ifaceErr := net .Interfaces ()
60
- if ifaceErr != nil {
61
- log .Debug ().Err (ifaceErr ).Msg (nsName )
62
- return nil
63
- }
64
- for _ , iface := range ifaces {
64
+ if iface , ifaceErr := net .InterfaceByName (podEth0 ); ifaceErr == nil {
65
65
if (iface .Flags & net .FlagLoopback ) == 0 && (iface .Flags & net .FlagUp ) != 0 {
66
66
if addrs , addrErr := iface .Addrs (); addrErr == nil {
67
67
for _ , addr := range addrs {
68
68
addrStr := addr .String ()
69
69
addrStr = addrStr [0 :strings .Index (addrStr , `/` )]
70
+ log .Debug ().Msgf ("netns Addr:%s %s" , iface .Name , addrStr )
70
71
if pod , exists := monitoredPodsByAddr [addrStr ]; exists {
72
+ log .Debug ().Msgf ("monitoredPodsByAddr:%s" , addrStr )
71
73
if attachErr := tc .AttachBPFProg (maps .SysMesh , iface .Name , true , true ); attachErr != nil {
72
74
return fmt .Errorf (`%s %s` , pod , attachErr .Error ())
73
75
}
76
+ log .Debug ().Msgf ("monitoredPodsByAddr:%s attach success" , addrStr )
74
77
delete (monitoredPodsByAddr , addrStr )
75
78
delete (allPodsByAddr , addrStr )
76
79
} else if pod , exists := allPodsByAddr [addrStr ]; exists {
80
+ log .Debug ().Msgf ("allPodsByAddr:%s" , addrStr )
77
81
if detachErr := tc .DetachBPFProg (maps .SysMesh , iface .Name , true , true ); detachErr != nil {
78
82
return fmt .Errorf (`%s %s` , pod , detachErr .Error ())
79
83
}
84
+ log .Debug ().Msgf ("allPodsByAddr:%s detach success" , addrStr )
80
85
delete (allPodsByAddr , addrStr )
81
86
}
82
87
}
@@ -89,19 +94,18 @@ func (s *server) doCheckAndRepairPods() map[string]string {
89
94
}
90
95
}
91
96
}
97
+ log .Debug ().Msgf ("monitoredPodsByAddr Attach Fail Count: %d" , len (monitoredPodsByAddr ))
92
98
return monitoredPodsByAddr
93
99
}
94
100
95
101
func (s * server ) checkAndResetPods () {
96
- var resetFailPods map [ string ] string
102
+ retries := 3
97
103
for {
98
- resetFailPods = s .doCheckAndResetPods ()
99
- if len (resetFailPods ) == 0 {
104
+ _ = s .doCheckAndResetPods ()
105
+ retries --
106
+ if retries < 0 {
100
107
break
101
108
}
102
- for _ , pod := range resetFailPods {
103
- log .Error ().Msgf (`fail to check and reset pod: %s` , pod )
104
- }
105
109
time .Sleep (time .Second * 3 )
106
110
}
107
111
}
@@ -112,8 +116,12 @@ func (s *server) doCheckAndResetPods() map[string]string {
112
116
for _ , pod := range pods {
113
117
allPodsByAddr [pod .Status .PodIP ] = fmt .Sprintf (`%s/%s` , pod .Namespace , pod .Name )
114
118
}
115
- netnsDirs := []string {volume .Netns .MountPath , volume .SysProc .MountPath }
116
- for _ , netnsDir := range netnsDirs {
119
+ log .Debug ().Msgf ("allPodsByAddr Count: %d" , len (allPodsByAddr ))
120
+
121
+ for _ , netnsDir := range volume .Netns {
122
+ if len (allPodsByAddr ) == 0 {
123
+ break
124
+ }
117
125
rd , err := os .ReadDir (netnsDir )
118
126
if err != nil {
119
127
log .Debug ().Err (err ).Msg (netnsDir )
@@ -128,21 +136,18 @@ func (s *server) doCheckAndResetPods() map[string]string {
128
136
}
129
137
130
138
if nsErr = netNS .Do (func (_ ns.NetNS ) error {
131
- ifaces , ifaceErr := net .Interfaces ()
132
- if ifaceErr != nil {
133
- log .Debug ().Err (ifaceErr ).Msg (nsName )
134
- return nil
135
- }
136
- for _ , iface := range ifaces {
139
+ if iface , ifaceErr := net .InterfaceByName (podEth0 ); ifaceErr == nil {
137
140
if (iface .Flags & net .FlagLoopback ) == 0 && (iface .Flags & net .FlagUp ) != 0 {
138
141
if addrs , addrErr := iface .Addrs (); addrErr == nil {
139
142
for _ , addr := range addrs {
140
143
addrStr := addr .String ()
141
144
addrStr = addrStr [0 :strings .Index (addrStr , `/` )]
145
+ log .Debug ().Msgf ("netns Addr:%s %s" , iface .Name , addrStr )
142
146
if pod , exists := allPodsByAddr [addrStr ]; exists {
143
147
if detachErr := tc .DetachBPFProg (maps .SysMesh , iface .Name , true , true ); detachErr != nil {
144
148
return fmt .Errorf (`%s %s` , pod , detachErr .Error ())
145
149
}
150
+ log .Debug ().Msgf ("allPodsByAddr:%s detach success" , addrStr )
146
151
delete (allPodsByAddr , addrStr )
147
152
}
148
153
}
0 commit comments