Skip to content

Commit 1140c8d

Browse files
shalemanshaleman
authored and
shaleman
committed
fix consul lock issue
fix master info consul service fix import latest objdb add a test for network partition godep import disable reload test fix build break Fix merge error
1 parent c502669 commit 1140c8d

File tree

16 files changed

+263
-88
lines changed

16 files changed

+263
-88
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# docker run --net=host <image> -host-label=<label>
2121
##
2222

23-
FROM golang:1.5.1
23+
FROM golang:1.6.1
2424
MAINTAINER Madhav Puri <[email protected]> (@mapuri)
2525

2626

Godeps/Godeps.json

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

netmaster/daemon/daemon.go

+11-11
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ func (d *MasterDaemon) Init() {
8686
if err != nil {
8787
log.Fatalf("Failed to init resource manager. Error: %s", err)
8888
}
89+
90+
// Create an objdb client
91+
d.objdbClient, err = objdb.NewClient(d.ClusterStore)
92+
if err != nil {
93+
log.Fatalf("Error connecting to state store: %v. Err: %v", d.ClusterStore, err)
94+
}
8995
}
9096

9197
func (d *MasterDaemon) registerService() {
@@ -105,7 +111,7 @@ func (d *MasterDaemon) registerService() {
105111
}
106112

107113
// Register the node with service registry
108-
err = master.ObjdbClient.RegisterService(srvInfo)
114+
err = d.objdbClient.RegisterService(srvInfo)
109115
if err != nil {
110116
log.Fatalf("Error registering service. Err: %v", err)
111117
}
@@ -120,7 +126,7 @@ func (d *MasterDaemon) registerService() {
120126
}
121127

122128
// Register the node with service registry
123-
err = master.ObjdbClient.RegisterService(srvInfo)
129+
err = d.objdbClient.RegisterService(srvInfo)
124130
if err != nil {
125131
log.Fatalf("Error registering service. Err: %v", err)
126132
}
@@ -136,7 +142,7 @@ func (d *MasterDaemon) agentDiscoveryLoop() {
136142
watchStopCh := make(chan bool, 1)
137143

138144
// Start a watch on netplugin service
139-
err := master.ObjdbClient.WatchService("netplugin", agentEventCh, watchStopCh)
145+
err := d.objdbClient.WatchService("netplugin", agentEventCh, watchStopCh)
140146
if err != nil {
141147
log.Fatalf("Could not start a watch on netplugin service. Err: %v", err)
142148
}
@@ -230,7 +236,7 @@ func (d *MasterDaemon) runLeader() {
230236
defer d.listenerMutex.Unlock()
231237

232238
// Create a new api controller
233-
d.apiController = objApi.NewAPIController(router, d.ClusterStore)
239+
d.apiController = objApi.NewAPIController(router, d.objdbClient, d.ClusterStore)
234240

235241
//Restore state from clusterStore
236242
d.restoreCache()
@@ -342,17 +348,11 @@ func (d *MasterDaemon) RunMasterFsm() {
342348
log.Fatalf("Error creating ofnet master")
343349
}
344350

345-
// Create an objdb client
346-
master.ObjdbClient, err = objdb.NewClient(d.ClusterStore)
347-
if err != nil {
348-
log.Fatalf("Error connecting to state store: %v. Err: %v", d.ClusterStore, err)
349-
}
350-
351351
// Register all existing netplugins in the background
352352
go d.agentDiscoveryLoop()
353353

354354
// Create the lock
355-
leaderLock, err = master.ObjdbClient.NewLock("netmaster/leader", localIP, leaderLockTTL)
355+
leaderLock, err = d.objdbClient.NewLock("netmaster/leader", localIP, leaderLockTTL)
356356
if err != nil {
357357
log.Fatalf("Could not create leader lock. Err: %v", err)
358358
}

netmaster/master/netmaster.go

-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"github.com/contiv/netplugin/utils/netutils"
2929

3030
log "github.com/Sirupsen/logrus"
31-
"github.com/contiv/objdb"
3231
"github.com/samalba/dockerclient"
3332
)
3433

@@ -45,9 +44,6 @@ type nmRunTimeConf struct {
4544

4645
var masterRTCfg nmRunTimeConf
4746

48-
// ObjdbClient global
49-
var ObjdbClient objdb.API
50-
5147
// SetClusterMode sets the cluster mode for the contiv plugin
5248
func SetClusterMode(cm string) error {
5349
switch cm {

netmaster/objApi/apiController.go

+6-3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/contiv/netplugin/netmaster/mastercfg"
3030
"github.com/contiv/netplugin/utils"
3131
"github.com/contiv/netplugin/utils/netutils"
32+
"github.com/contiv/objdb"
3233
"github.com/contiv/objdb/modeldb"
3334

3435
"encoding/json"
@@ -42,7 +43,8 @@ import (
4243

4344
// APIController stores the api controller state
4445
type APIController struct {
45-
router *mux.Router
46+
router *mux.Router
47+
objdbClient objdb.API // Objdb client
4648
}
4749

4850
// BgpInspect is bgp inspect struct
@@ -54,9 +56,10 @@ type BgpInspect struct {
5456
var apiCtrler *APIController
5557

5658
// NewAPIController creates a new controller
57-
func NewAPIController(router *mux.Router, storeURL string) *APIController {
59+
func NewAPIController(router *mux.Router, objdbClient objdb.API, storeURL string) *APIController {
5860
ctrler := new(APIController)
5961
ctrler.router = router
62+
ctrler.objdbClient = objdbClient
6063

6164
// init modeldb
6265
modeldb.Init(storeURL)
@@ -1841,7 +1844,7 @@ func (ac *APIController) BgpGetOper(bgp *contivModel.BgpInspect) error {
18411844
var obj *BgpInspect
18421845
var host string
18431846

1844-
srvList, err := master.ObjdbClient.GetService("netplugin")
1847+
srvList, err := ac.objdbClient.GetService("netplugin")
18451848
if err != nil {
18461849
log.Errorf("Error getting netplugin nodes. Err: %v", err)
18471850
return err

netmaster/objApi/objapi_test.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"github.com/contiv/netplugin/state"
3636
"github.com/contiv/netplugin/utils"
3737
"github.com/contiv/netplugin/utils/netutils"
38+
"github.com/contiv/objdb"
3839
"github.com/contiv/ofnet"
3940
etcdclient "github.com/coreos/etcd/client"
4041
"github.com/gorilla/mux"
@@ -80,8 +81,14 @@ func TestMain(m *testing.M) {
8081
s.HandleFunc("/plugin/createEndpoint", makeHTTPHandler(master.CreateEndpointHandler))
8182
s = router.Methods("Get").Subrouter()
8283

84+
// create objdb client
85+
objdbClient, err := objdb.NewClient("etcd://127.0.0.1:2379")
86+
if err != nil {
87+
log.Fatalf("Error connecting to state store: etcd://127.0.0.1:2379. Err: %v", err)
88+
}
89+
8390
// Create a new api controller
84-
apiController = NewAPIController(router, "etcd://127.0.0.1:2379")
91+
apiController = NewAPIController(router, objdbClient, "etcd://127.0.0.1:2379")
8592

8693
ofnetMaster := ofnet.NewOfnetMaster("127.0.0.1", ofnet.OFNET_MASTER_PORT)
8794
if ofnetMaster == nil {

scripts/netContain/BuildContainer.sh

+11-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ function dockerBuildIt {
55

66
if [[ $imgId =~ [0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f] ]]; then
77
echo "$2 Image has been built with ID $imgId"
8+
docker tag $imgId $2
89
return 0
910
fi
1011
echo "$2 Image was not built properly"
@@ -28,25 +29,25 @@ fi
2829

2930
sudo modprobe openvswitch
3031

31-
imgId="Contiv"
32-
dockerBuildIt . $imgId
32+
imgName="Contiv"
33+
dockerBuildIt . $imgName
3334
if [ $? != 0 ]; then
3435
echo "Failed building Contiv Image Bailing out Err $?"
3536
exit
3637
fi
3738

38-
docker run --name=$imgId $imgId 2> /dev/null
39+
docker run --name=$imgName $imgName 2> /dev/null
3940

4041
echo "Copying the Contiv Binaries from the built container"
41-
docker cp $imgId:/go/bin/netplugin scripts/netContain/
42-
docker cp $imgId:/go/bin/netmaster scripts/netContain/
43-
docker cp $imgId:/go/bin/netctl scripts/netContain/
44-
docker cp $imgId:/go/bin/contivk8s scripts/netContain/
42+
docker cp $imgName:/go/bin/netplugin scripts/netContain/
43+
docker cp $imgName:/go/bin/netmaster scripts/netContain/
44+
docker cp $imgName:/go/bin/netctl scripts/netContain/
45+
docker cp $imgName:/go/bin/contivk8s scripts/netContain/
4546

4647

4748
echo "Removing Intermediate Contiv Container"
48-
docker rm -f $imgId
49-
docker rmi -f $imgId
49+
docker rm -f $imgName
50+
docker rmi -f $imgName
5051

5152

5253
dockerBuildIt scripts/netContain contivbase
@@ -56,4 +57,4 @@ if [ $? != 0 ]; then
5657
fi
5758

5859
#echo "Build Contiv Image $imgId ..., tagging it as contivbase"
59-
#docker tag $imgId contivbase
60+
# docker tag contivbase netplugin

scripts/netContain/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ MAINTAINER Rajesh Nataraja <[email protected]>
2727
#Install the Open vSwitch package on this ubuntu container.
2828
RUN apt update
2929
RUN apt --assume-yes install openvswitch-switch
30-
RUN apt --assume-yes install net-tools
30+
RUN apt --assume-yes install net-tools iptables
3131

3232
#Copy the Net Plugin Binaries and the Scripts required to
3333
# start the Net Plugin and Net Master.

scripts/netContain/contivNet.sh

+15-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ cstore=""
99
cmode="bridge"
1010
netmaster=false
1111
netplugin=false
12-
vlan_if=""
12+
vlan_if="invalid"
1313

1414
#This needs to be fixed, we cant rely on the value being supplied from
1515
# paramters, just explosion of parameters is not a great solution
@@ -80,11 +80,12 @@ fi
8080

8181
if [ $netmaster == true ]; then
8282
echo "Starting Netmaster "
83+
mkdir -p /var/contiv/log/
8384
while [ true ]; do
8485
if [ "$cstore" != "" ]; then
85-
/contiv/bin/netmaster -cluster-mode $plugin -cluster-store $cstore &> /var/contiv/log/netmaster.log
86+
/contiv/bin/netmaster -cluster-mode $plugin -dns-enable=false -cluster-store $cstore &> /var/contiv/log/netmaster.log
8687
else
87-
/contiv/bin/netmaster -cluster-mode $plugin &> /var/contiv/log/netmaster.log
88+
/contiv/bin/netmaster -cluster-mode $plugin -dns-enable=false &> /var/contiv/log/netmaster.log
8889
fi
8990
echo "CRITICAL : Net Master has exited, Respawn in 5"
9091
sleep 5
@@ -93,17 +94,20 @@ fi
9394

9495
if [ $netplugin == true ]; then
9596

96-
97-
if [ "$vtep_ip" == "" ] || [ "$vlan_if" == "" ]; then
98-
echo "Net Plugin Cannot be started without specifying the VETP or VLAN Interface"
99-
exit
100-
fi
101-
97+
mkdir -p /var/contiv/log/
10298
while [ true ]; do
10399
if [ "$cstore" != "" ]; then
104-
/contiv/bin/netplugin -cluster-store $cstore -vtep-ip $vtep_ip -vlan-if $vlan_if -fwd-mode $fwd_mode -plugin-mode $plugin &> /var/contiv/log/netplugin.log
100+
if [ "$vtep_ip" != "" ]; then
101+
/contiv/bin/netplugin -cluster-store $cstore -vtep-ip $vtep_ip -vlan-if $vlan_if -plugin-mode $plugin &> /var/contiv/log/netplugin.log
102+
else
103+
/contiv/bin/netplugin -cluster-store $cstore -vlan-if $vlan_if -plugin-mode $plugin &> /var/contiv/log/netplugin.log
104+
fi
105105
else
106-
/contiv/bin/netplugin -vtep-ip $vtep_ip -vlan-if $vlan_if -fwd-mode $fwd_mode -plugin-mode $plugin &> /var/contiv/log/netplugin.log
106+
if [ "$vtep_ip" != "" ]; then
107+
/contiv/bin/netplugin -vtep-ip $vtep_ip -vlan-if $vlan_if -plugin-mode $plugin &> /var/contiv/log/netplugin.log
108+
else
109+
/contiv/bin/netplugin -vlan-if $vlan_if -plugin-mode $plugin &> /var/contiv/log/netplugin.log
110+
fi
107111
fi
108112
echo "CRITICAL : Net Plugin has exited, Respawn in 5"
109113
sleep 5

test/systemtests/node_test.go

+43
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,49 @@ func (n *node) restartClusterStore() error {
192192
return nil
193193
}
194194

195+
// bring down an interface on a node
196+
func (n *node) bringDownIf(ifname string) error {
197+
logrus.Infof("Bringing down interface %s on node %s", ifname, n.Name())
198+
out, err := n.runCommand(fmt.Sprintf("sudo ifconfig %s down", ifname))
199+
if err != nil {
200+
logrus.Errorf("Failed to bring down interface %s on node %v", ifname, n.Name())
201+
logrus.Println(out)
202+
return err
203+
}
204+
205+
return nil
206+
}
207+
208+
// bring up an interface on a node
209+
func (n *node) bringUpIf(ifname, ipAddr string) error {
210+
logrus.Infof("Bringing up interface %s with addr %s/24 on node %s", ifname, ipAddr, n.Name())
211+
212+
for i := 0; i < 3; i++ {
213+
// bring up the interface
214+
out, err := n.runCommand(fmt.Sprintf("sudo ifconfig %s %s/24 up", ifname, ipAddr))
215+
if err != nil {
216+
logrus.Errorf("Failed to bring up interface %s on node %v", ifname, n.Name())
217+
logrus.Println(out)
218+
return err
219+
}
220+
221+
// verify link is up
222+
out, err = n.runCommand(fmt.Sprintf("sudo ifconfig %s", ifname))
223+
if err != nil {
224+
logrus.Errorf("Failed to check interface %s status on node %v", ifname, n.Name())
225+
logrus.Println(out)
226+
return err
227+
}
228+
229+
// if the interface is up, we are done
230+
if strings.Contains(out, "RUNNING") {
231+
return nil
232+
}
233+
}
234+
235+
return fmt.Errorf("Failed to bring up interface")
236+
}
237+
195238
func (n *node) waitForListeners() error {
196239
return n.exec.waitForListeners()
197240
}

test/systemtests/trigger_test.go

+47-2
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,11 @@ func (s *systemtestSuite) TestTriggerNetpluginDisconnect(c *C) {
130130
}
131131

132132
func (s *systemtestSuite) TestTriggerNodeReload(c *C) {
133-
c.Skip("Skipping this tests temporarily")
134-
135133
// can not run this test on docker 1.10 & k8s
136134
if os.Getenv("CONTIV_DOCKER_VERSION") == "1.10.3" || s.basicInfo.Scheduler == "k8" {
137135
c.Skip("Skipping node reload test on [older docker version | consul | k8s]")
138136
}
137+
139138
network := &client.Network{
140139
TenantName: "default",
141140
NetworkName: "private",
@@ -250,6 +249,52 @@ func (s *systemtestSuite) TestTriggerClusterStoreRestart(c *C) {
250249
c.Assert(s.cli.NetworkDelete("default", "private"), IsNil)
251250
}
252251

252+
// TestTriggerNetPartition tests network partition by flapping uplink
253+
func (s *systemtestSuite) TestTriggerNetPartition(c *C) {
254+
// create network
255+
network := &client.Network{
256+
TenantName: "default",
257+
NetworkName: "private",
258+
Subnet: "10.1.1.0/24",
259+
Gateway: "10.1.1.254",
260+
Encap: "vxlan",
261+
}
262+
c.Assert(s.cli.NetworkPost(network), IsNil)
263+
264+
for i := 0; i < s.basicInfo.Iterations; i++ {
265+
containers, err := s.runContainers(s.basicInfo.Containers, false, "private", "default", nil, nil)
266+
c.Assert(err, IsNil)
267+
268+
// test ping for all containers
269+
c.Assert(s.pingTest(containers), IsNil)
270+
271+
// reload VMs one at a time
272+
for _, node := range s.nodes {
273+
nodeIP, err := node.getIPAddr("eth1")
274+
c.Assert(err, IsNil)
275+
276+
// flap the control interface
277+
c.Assert(node.bringDownIf("eth1"), IsNil)
278+
time.Sleep(50 * time.Second) // wait till sessions/locks timeout
279+
c.Assert(node.bringUpIf("eth1", nodeIP), IsNil)
280+
281+
time.Sleep(20 * time.Second)
282+
c.Assert(s.verifyVTEPs(), IsNil)
283+
284+
c.Assert(s.verifyEPs(containers), IsNil)
285+
time.Sleep(2 * time.Second)
286+
287+
// test ping for all containers
288+
c.Assert(s.pingTest(containers), IsNil)
289+
}
290+
291+
c.Assert(s.removeContainers(containers), IsNil)
292+
}
293+
294+
// delete the network
295+
c.Assert(s.cli.NetworkDelete("default", "private"), IsNil)
296+
}
297+
253298
func (s *systemtestSuite) TestTriggers(c *C) {
254299

255300
groupNames := []string{}

0 commit comments

Comments
 (0)