Step: ovn-sdn-migration-rollback

The step rollback the networkType from OVNKubernetes to OpenShiftSDN in a running cluster

Container image used for this step: ocp/cli-jq:latest

ocp/cli-jq:latest resolves to an image imported from the specified imagestream tag on the build farm (documentation).

Environment

Step exposes no environmental variables except the defaults.

Source Code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
set -x
set -o errexit
set -o nounset
set -o pipefail

TARGET=${TARGET:-OpenShiftSDN}

oc patch MachineConfigPool master --type='merge' --patch '{"spec":{"paused":true}}'
oc patch MachineConfigPool worker --type='merge' --patch '{"spec":{"paused":true}}'

# Reset the spec.migration before we can set it to other value
oc patch Network.operator.openshift.io cluster --type='merge' --patch '{"spec":{"migration":null}}'
# Wait until CNO update the applied-cluster cm to the latest
timeout 60s bash <<EOT
until 
  ! oc get network -o yaml | grep migration  > /dev/null
do
  echo "migration field is not cleaned by CNO"
  sleep 3
done
EOT

# Allow rollback
# Change network to target network in Network.config.openshift.io the CR to trigger machine config update by MCO.
oc patch Network.operator.openshift.io cluster --type='merge' --patch "{\"spec\":{\"migration\":{\"networkType\":\"${TARGET}\"}}}"
oc patch Network.config.openshift.io cluster --type='merge' --patch "{\"spec\":{\"networkType\":\"${TARGET}\"}}"

oc wait co network --for='condition=PROGRESSING=True' --timeout=30s
# Wait until the multus pods are restarted
timeout 300 oc rollout status ds/multus -n openshift-multus

# Reboot all the nodes

oc get pod -n openshift-machine-config-operator | grep daemon|awk '{print $1}'|xargs -i oc rsh -n openshift-machine-config-operator {} chroot /rootfs shutdown -r +1

# Wait until all nodes reboot and the api-server is unreachable.
sleep 65

# Wait for nodes come back
timeout 1800s bash <<EOT
until
  oc wait node --all --for condition=ready --timeout=10s;
do
  echo "nodes not ready"
  sleep 10
done
EOT

# Resume MCPs after reboot
timeout 1800s bash <<EOT
until
  oc patch MachineConfigPool master --type='merge' --patch '{"spec":{"paused":false}}' && \
  oc patch MachineConfigPool worker --type='merge' --patch '{"spec":{"paused":false}}';
do
  sleep 10;
done
EOT

oc wait mcp --all --for='condition=UPDATING=True' --timeout=300s

# Wait until MCO finishes its work or it reaches the 20mins timeout
timeout 2700s bash <<EOT
until
  oc wait mcp --all --for='condition=UPDATED=True' --timeout=10s && \
  oc wait mcp --all --for='condition=UPDATING=False' --timeout=10s && \
  oc wait mcp --all --for='condition=DEGRADED=False' --timeout=10s; 
do
  sleep 10
  echo "Some MachineConfigPool Degraded=True,Progressing=True,or Available=False";
done
EOT

# Check all cluster operators back to normal
timeout 2700s bash <<EOT
until
  oc wait co --all --for='condition=AVAILABLE=True' --timeout=10s && \
  oc wait co --all --for='condition=PROGRESSING=False' --timeout=10s && \
  oc wait co --all --for='condition=DEGRADED=False' --timeout=10s;
do
  sleep 10
  echo "Some ClusterOperators Degraded=False,Progressing=True,or Available=False";
done
EOT

oc get co

Properties

Property Value Description
Resource requests (cpu) 10m Used in .resources.requests of the pod running this step.
Resource requests (memory) 100Mi Used in .resources.requests of the pod running this step.

GitHub Link:

https://github.com/openshift/release/blob/master/ci-operator/step-registry/ovn/sdn-migration-rollback/ovn-sdn-migration-rollback-ref.yaml

Owners:

Approvers:

Reviewers:

Source code for this page located on GitHub