Skip to content

Commit 7c28f52

Browse files
authored
Merge pull request #7854 from AppliedIntuition/master
Fix 2 bugs in the OCI integration
2 parents 3e92831 + 8da9a7b commit 7c28f52

File tree

4 files changed

+74
-29
lines changed

4 files changed

+74
-29
lines changed

cluster-autoscaler/cloudprovider/oci/common/oci_shape.go

+21-13
Original file line numberDiff line numberDiff line change
@@ -105,23 +105,31 @@ func (osf *shapeGetterImpl) GetNodePoolShape(np *oke.NodePool, ephemeralStorage
105105
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
106106
defer cancel()
107107

108-
resp, err := osf.shapeClient.ListShapes(ctx, core.ListShapesRequest{
108+
request := core.ListShapesRequest{
109109
CompartmentId: np.CompartmentId,
110-
Limit: common.Int(500),
111-
})
112-
if err != nil {
113-
return nil, errors.Wrap(err, "unable to ListShapes")
114110
}
115111

116-
// Update the cache based on latest results
117-
for _, s := range resp.Items {
118-
osf.cache[*s.Shape] = &Shape{
119-
Name: shapeName,
120-
CPU: getFloat32(s.Ocpus) * 2, // convert ocpu to vcpu
121-
GPU: getInt(s.Gpus),
122-
MemoryInBytes: getFloat32(s.MemoryInGBs) * 1024 * 1024 * 1024,
123-
EphemeralStorageInBytes: float32(ephemeralStorage),
112+
for {
113+
resp, err := osf.shapeClient.ListShapes(ctx, request)
114+
if err != nil {
115+
return nil, errors.Wrap(err, "unable to ListShapes")
116+
}
117+
118+
// Update the cache based on latest results
119+
for _, s := range resp.Items {
120+
osf.cache[*s.Shape] = &Shape{
121+
Name: *s.Shape,
122+
CPU: getFloat32(s.Ocpus) * 2, // convert ocpu to vcpu
123+
GPU: getInt(s.Gpus),
124+
MemoryInBytes: getFloat32(s.MemoryInGBs) * 1024 * 1024 * 1024,
125+
EphemeralStorageInBytes: float32(ephemeralStorage),
126+
}
127+
}
128+
129+
if resp.OpcNextPage == nil {
130+
break
124131
}
132+
request.Page = resp.OpcNextPage
125133
}
126134

127135
// fetch value from updated cache... if it exists.

cluster-autoscaler/cloudprovider/oci/common/oci_shape_test.go

+32-15
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,26 @@ package common
66

77
import (
88
"context"
9-
oke "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/containerengine"
109
"reflect"
1110
"strings"
1211
"testing"
1312

13+
oke "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/containerengine"
14+
1415
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common"
1516
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/core"
1617
)
1718

1819
type mockShapeClient struct {
1920
err error
20-
listShapeResp core.ListShapesResponse
21+
listShapeResponses []core.ListShapesResponse
2122
getInstanceConfigResp core.GetInstanceConfigurationResponse
23+
requestCount int
2224
}
2325

2426
func (m *mockShapeClient) ListShapes(_ context.Context, _ core.ListShapesRequest) (core.ListShapesResponse, error) {
25-
return m.listShapeResp, m.err
27+
m.requestCount++
28+
return m.listShapeResponses[m.requestCount-1], m.err
2629
}
2730

2831
func (m *mockShapeClient) GetInstanceConfiguration(context.Context, core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) {
@@ -46,12 +49,14 @@ var instanceDetails = core.ComputeInstanceDetails{
4649

4750
var shapeClient = &mockShapeClient{
4851
err: nil,
49-
listShapeResp: core.ListShapesResponse{
50-
Items: []core.Shape{
51-
{
52-
Shape: common.String("VM.Standard2.8"),
53-
Ocpus: common.Float32(8),
54-
MemoryInGBs: common.Float32(120),
52+
listShapeResponses: []core.ListShapesResponse{
53+
{
54+
Items: []core.Shape{
55+
{
56+
Shape: common.String("VM.Standard2.8"),
57+
Ocpus: common.Float32(8),
58+
MemoryInGBs: common.Float32(120),
59+
},
5560
},
5661
},
5762
},
@@ -76,12 +81,24 @@ func TestNodePoolGetShape(t *testing.T) {
7681

7782
shapeClient := &mockShapeClient{
7883
err: nil,
79-
listShapeResp: core.ListShapesResponse{
80-
Items: []core.Shape{
81-
{
82-
Shape: common.String("VM.Standard1.2"),
83-
Ocpus: common.Float32(2),
84-
MemoryInGBs: common.Float32(16),
84+
listShapeResponses: []core.ListShapesResponse{
85+
{
86+
Items: []core.Shape{
87+
{
88+
Shape: common.String("VM.Standard1.1"),
89+
Ocpus: common.Float32(2),
90+
MemoryInGBs: common.Float32(16),
91+
},
92+
},
93+
OpcNextPage: common.String("nextPage"),
94+
},
95+
{
96+
Items: []core.Shape{
97+
{
98+
Shape: common.String("VM.Standard1.2"),
99+
Ocpus: common.Float32(2),
100+
MemoryInGBs: common.Float32(16),
101+
},
85102
},
86103
},
87104
},

cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go

+13
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ func (m *ociManagerImpl) GetExistingNodePoolSizeViaCompute(np NodePool) (int, er
461461
if !strings.HasPrefix(*item.DisplayName, displayNamePrefix) {
462462
continue
463463
}
464+
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
465+
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
466+
if *item.Id == "" {
467+
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
468+
continue
469+
}
464470
switch item.LifecycleState {
465471
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
466472
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
@@ -525,6 +531,13 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance
525531
var instances []cloudprovider.Instance
526532
for _, node := range nodePool.Nodes {
527533

534+
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
535+
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
536+
if *node.Id == "" {
537+
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
538+
continue
539+
}
540+
528541
if node.NodeError != nil {
529542

530543
errorClass := cloudprovider.OtherErrorClass

cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ package nodepools
66

77
import (
88
"context"
9-
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
109
"net/http"
1110
"reflect"
1211
"testing"
1312

13+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
14+
1415
apiv1 "k8s.io/api/core/v1"
1516
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
1617
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common"
@@ -123,6 +124,12 @@ func TestGetNodePoolNodes(t *testing.T) {
123124
Message: common.String("blah blah quota exceeded blah blah"),
124125
},
125126
},
127+
{
128+
// This case happens if a node fails to scale up due to lack of capacity in the region.
129+
// It's not a real node, so we shouldn't return it in the list of nodes.
130+
Id: common.String(""),
131+
LifecycleState: oke.NodeLifecycleStateCreating,
132+
},
126133
},
127134
}
128135

0 commit comments

Comments
 (0)