Skip to content

cmd/openshift-install/create: Make waitForInstallComplete() more robust #1413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 59 additions & 33 deletions cmd/openshift-install/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crypto/x509"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
Expand All @@ -16,6 +17,7 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -316,17 +318,18 @@ func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset

// waitForInitializedCluster watches the ClusterVersion waiting for confirmation
// that the cluster has been initialized.
func waitForInitializedCluster(ctx context.Context, config *rest.Config) error {
func waitForInitializedCluster(ctx context.Context, config *rest.Config) (configv1.ClusterStatusConditionType, error) {
failing := configv1.ClusterStatusConditionType("Failing")
status := failing
timeout := 30 * time.Minute
logrus.Infof("Waiting up to %v for the cluster at %s to initialize...", timeout, config.Host)
cc, err := configclient.NewForConfig(config)
if err != nil {
return errors.Wrap(err, "failed to create a config client")
return status, errors.Wrap(err, "failed to create a config client")
}
clusterVersionContext, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

failing := configv1.ClusterStatusConditionType("Failing")
var lastError string
_, err = clientwatch.UntilWithSync(
clusterVersionContext,
Expand All @@ -342,11 +345,14 @@ func waitForInitializedCluster(ctx context.Context, config *rest.Config) error {
return false, nil
}
if cov1helpers.IsStatusConditionTrue(cv.Status.Conditions, configv1.OperatorAvailable) {
status = configv1.OperatorAvailable
return true, nil
}
if cov1helpers.IsStatusConditionTrue(cv.Status.Conditions, failing) {
status = failing
lastError = cov1helpers.FindStatusCondition(cv.Status.Conditions, failing).Message
} else if cov1helpers.IsStatusConditionTrue(cv.Status.Conditions, configv1.OperatorProgressing) {
status = configv1.OperatorProgressing
lastError = cov1helpers.FindStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing).Message
}
logrus.Debugf("Still waiting for the cluster to initialize: %s", lastError)
Expand All @@ -359,18 +365,21 @@ func waitForInitializedCluster(ctx context.Context, config *rest.Config) error {

if err == nil {
logrus.Debug("Cluster is initialized")
return nil
return status, nil
}

if lastError != "" {
return errors.Wrapf(err, "failed to initialize the cluster: %s", lastError)
if status == configv1.OperatorProgressing {
return status, errors.Wrapf(err, "the cluster is still initializing: %s", lastError)
}
return status, errors.Wrapf(err, "failed to initialize the cluster: %s", lastError)
}

return errors.Wrap(err, "failed to initialize the cluster")
return status, errors.Wrap(err, "failed to initialize the cluster")
}

// waitForConsole returns the console URL from the route 'console' in namespace openshift-console
func waitForConsole(ctx context.Context, config *rest.Config, directory string) (string, error) {
func waitForConsole(ctx context.Context, config *rest.Config, directory string, oneShot bool) (string, error) {
url := ""
// Need to keep these updated if they change
consoleNamespace := "openshift-console"
Expand All @@ -381,9 +390,13 @@ func waitForConsole(ctx context.Context, config *rest.Config, directory string)
}

consoleRouteTimeout := 10 * time.Minute
logrus.Infof("Waiting up to %v for the openshift-console route to be created...", consoleRouteTimeout)
consoleRouteContext, cancel := context.WithTimeout(ctx, consoleRouteTimeout)
defer cancel()
if oneShot {
logrus.Infof("Checking for the %s route...", consoleNamespace)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/s/consoleNamespace/consoleRouteName

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/s/consoleNamespace/consoleRouteName

My follow up is waiting for the downloads route too; this choice will be a better fit then. Even without follow-up work, I think "the main console-namespace route" isn't that bad vs. naming the route without the namespace. If we end up sticking with a single route, I'll probably write both the namespace and name.

} else {
logrus.Infof("Waiting up to %v for the %s route to be created...", consoleRouteTimeout, consoleNamespace)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/s/consoleNamespace/consoleRouteName

}
// Poll quickly but only log when the response
// when we've seen 15 of the same errors or output of
// no route in a row (to show we're still alive).
Expand All @@ -393,39 +406,40 @@ func waitForConsole(ctx context.Context, config *rest.Config, directory string)
consoleRoutes, err := rc.RouteV1().Routes(consoleNamespace).List(metav1.ListOptions{})
if err == nil && len(consoleRoutes.Items) > 0 {
for _, route := range consoleRoutes.Items {
logrus.Debugf("Route found in openshift-console namespace: %s", route.Name)
logrus.Debugf("Route found in %s namespace: %s", consoleNamespace, route.Name)
if route.Name == consoleRouteName {
url = fmt.Sprintf("https://%s", route.Spec.Host)
}
}
logrus.Debug("OpenShift console route is created")
cancel()
} else if err != nil {
silenceRemaining--
if silenceRemaining == 0 {
logrus.Debugf("Still waiting for the console route: %v", err)
silenceRemaining = logDownsample
}
} else if len(consoleRoutes.Items) == 0 {
silenceRemaining--
if silenceRemaining == 0 {
logrus.Debug("Still waiting for the console route...")
silenceRemaining = logDownsample
return
}
silenceRemaining--
if silenceRemaining == 0 {
silenceRemaining = logDownsample
if err == nil {
logrus.Debugf("Still waiting for the %s route...", consoleRouteName)
} else {
logrus.Debugf("Still waiting for the %s route: %v", consoleRouteName, err)
}
}
if oneShot {
cancel()
}
}, 2*time.Second, consoleRouteContext.Done())
err = consoleRouteContext.Err()
if err != nil && err != context.Canceled {
return url, errors.Wrap(err, "waiting for openshift-console URL")
return url, errors.Wrapf(err, "waiting for the %s route", consoleNamespace)
}
if url == "" {
return url, errors.New("could not get openshift-console URL")
return url, errors.Errorf("could not get the %s route", consoleNamespace)
}
return url, nil
}

// logComplete prints info upon completion
func logComplete(directory, consoleURL string) error {
func logComplete(config *rest.Config, directory string, clusterStatus configv1.ClusterStatusConditionType, consoleURL string, complete bool) error {
absDir, err := filepath.Abs(directory)
if err != nil {
return err
Expand All @@ -436,26 +450,38 @@ func logComplete(directory, consoleURL string) error {
if err != nil {
return err
}
logrus.Info("Install complete!")
logrus.Infof("To access the cluster as the system:admin user when using 'oc', run 'export KUBECONFIG=%s'", kubeconfig)
logrus.Infof("Access the OpenShift web-console here: %s", consoleURL)
logrus.Infof("Login to the console with user: kubeadmin, password: %s", pw)
if complete {
logrus.Info("Install complete!")
logrus.Infof("To access the cluster as the system:admin user when using 'oc', run 'export KUBECONFIG=%s'", kubeconfig)
} else if clusterStatus == configv1.OperatorProgressing {
logrus.Infof("To give the cluster more time to initialize, you can run '%s wait-for install-complete'.", os.Args[0])
}
if consoleURL != "" {
logrus.Infof("Access the OpenShift web-console here: %s", consoleURL)
logrus.Infof("Login to the console with user: kubeadmin, password: %s", pw)
}
return nil
}

func waitForInstallComplete(ctx context.Context, config *rest.Config, directory string) error {
if err := waitForInitializedCluster(ctx, config); err != nil {
return err
errs := []error{}
clusterStatus, err := waitForInitializedCluster(ctx, config)
if err != nil {
errs = append(errs, err)
}

consoleURL, err := waitForConsole(ctx, config, rootOpts.dir)
consoleURL, err := waitForConsole(ctx, config, rootOpts.dir, err != nil)
if err != nil {
return err
errs = append(errs, err)
}

if err = addRouterCAToClusterCA(config, rootOpts.dir); err != nil {
return err
errs = append(errs, err)
}

if err = logComplete(config, rootOpts.dir, clusterStatus, consoleURL, utilerrors.NewAggregate(errs) == nil); err != nil {
errs = append(errs, err)
}

return logComplete(rootOpts.dir, consoleURL)
return utilerrors.NewAggregate(errs)
}