Skip to content

Commit 2607bc5

Browse files
authored
[improve test]reboot SONiC if sanity check fail before run tset case (#443)
* [improve test]reboot sonic and trying to restore good state if sanity check fail before calling test * revise message more descriptive and accurate follow feedbacks
1 parent e729f04 commit 2607bc5

File tree

7 files changed

+133
-52
lines changed

7 files changed

+133
-52
lines changed

ansible/roles/test/tasks/base_sanity.yml

+28-8
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,42 @@
11
- name: Get process information in syncd docker
22
shell: docker exec -i syncd ps aux | grep /usr/bin/syncd
33
register: ps_out
4+
ignore_errors: yes
45

56
- debug: var=ps_out.stdout_lines
67

7-
- name: Verify that syncd process is running
8-
assert: { that: "{{ ps_out.stdout_lines | length }} > 0"}
9-
10-
- name: Get oragent process information
8+
- name: Get orchagent process information
119
shell: pgrep orchagent -a
1210
register: orch_out
11+
ignore_errors: yes
1312

1413
- debug: var=orch_out.stdout_lines
1514

16-
- name: Verify that orch process is running
17-
assert: { that: "{{ orch_out.stdout_lines | length }} > 0"}
15+
- block:
16+
- name: reboot
17+
include: common_tasks/reboot_sonic.yml
18+
19+
- name: Get process information in syncd docker
20+
shell: docker exec -i syncd ps aux | grep /usr/bin/syncd
21+
register: psnew_out
22+
23+
- debug: var=psnew_out.stdout_lines
24+
25+
- name: Verify that syncd process is running
26+
assert: { that: "{{ psnew_out.stdout_lines | length }} > 0"}
27+
28+
- name: Get orchagent process information
29+
shell: pgrep orchagent -a
30+
register: orchnew_out
31+
32+
- debug: var=orchnew_out.stdout_lines
33+
34+
- name: Verify that orchagent process is running
35+
assert: { that: "{{ orchnew_out.stdout_lines | length }} > 0"}
36+
37+
when:
38+
- ({{ ps_out.stdout_lines | length }} <= 0) or ({{ orch_out.stdout_lines | length }} <= 0)
39+
- recover is defined
1840

1941
- name: Get syslog error information
2042
shell: cat /var/log/syslog |tail -n 5000 |grep -i error
@@ -23,5 +45,3 @@
2345
failed_when: false
2446

2547
- debug: var=syslog_out.stdout_lines
26-
27-
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
- name: reboot
2+
become: true
3+
shell: shutdown -r now "Warning! System is being rebooted remotely by reboot_sonic.yml"
4+
async: 1
5+
poll: 0
6+
ignore_errors: true
7+
8+
- name: pause for 1 minute before check
9+
pause: minutes=1
10+
11+
- name: Wait for switch to come back
12+
local_action:
13+
wait_for host={{ ansible_host }}
14+
port=22
15+
state=started
16+
delay=10
17+
timeout=180
18+
search_regex="OpenSSH_[\w\.]+ Debian"
19+
become: false
20+
changed_when: false
21+
22+
- name: wait for 2 minute for prcesses and interfaces to be stable
23+
pause: seconds=120

ansible/roles/test/tasks/decap.yml

+15
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,21 @@
22
# Run Decap test
33
#-----------------------------------------
44

5+
- block:
6+
- name: Set dscp_mode for decap test for broadcom
7+
set_fact:
8+
dscp_mode: pipe
9+
when:
10+
- sonic_hwsku in broadcom_hwskus
11+
- dscp_mode is not defined
12+
13+
- name: Set dscp_mode var for decap test for mellanox
14+
set_fact:
15+
dscp_mode: uniform
16+
when:
17+
- sonic_hwsku in mellanox_hwskus
18+
- dscp_mode is not defined
19+
520
- fail: msg="information about testbed missing."
621
when: (testbed_type is not defined) or
722
(dscp_mode is not defined)

ansible/roles/test/tasks/interface.yml

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,32 @@
11
- name: Get interface facts
22
interface_facts: up_ports={{minigraph_ports}}
33

4-
- debug: msg="Found link down ports {{ansible_interface_link_down_ports}} "
4+
- block:
5+
- debug: msg="Found link down ports {{ansible_interface_link_down_ports}}, reload SONiC and reenable down ports"
6+
7+
- name: reboot
8+
include: common_tasks/reboot_sonic.yml
9+
10+
- name: figure out fanout switch port in case it was down
11+
conn_graph_facts: host={{ inventory_hostname }}
12+
connection: local
13+
14+
- set_fact: neighbors="{{device_conn}}"
15+
16+
- include: resume_fanout_ports.yml
17+
with_items: ansible_interface_link_down_ports
18+
19+
- name: pause and wait interface to be up
20+
pause: seconds=30
21+
22+
- name: Get interface facts
23+
interface_facts: up_ports={{minigraph_ports}}
24+
25+
when:
26+
- ansible_interface_link_down_ports | length > 0
27+
- recover is defined
28+
29+
- debug: msg="Found link down ports {{ansible_interface_link_down_ports}}"
530
when: ansible_interface_link_down_ports | length > 0
631

732
- name: Verify interfaces are up correctly

ansible/roles/test/tasks/reboot.yml

+1-22
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,5 @@
11
- name: reboot
2-
become: true
3-
shell: sleep 2 && shutdown -r now "Reboot test."
4-
async: 1
5-
poll: 0
6-
ignore_errors: true
7-
8-
- name: pause for 1 minute before check
9-
pause: minutes=1
10-
11-
- name: Wait for switch to come back
12-
local_action:
13-
wait_for host={{ ansible_host }}
14-
port=22
15-
state=started
16-
delay=10
17-
timeout=180
18-
search_regex="OpenSSH_[\w\.]+ Debian"
19-
become: false
20-
changed_when: false
21-
22-
- name: wait again, processes and interfaces are not availabe right away
23-
pause: seconds=120
2+
include: common_tasks/reboot_sonic.yml
243

254
- name: sanity check to pass
265
include: base_sanity.yml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#This playbook is trying to bring up one fanout switch port
2+
- block:
3+
- set_fact:
4+
interface: "{{item}}"
5+
6+
- debug: msg={{interface}}
7+
8+
- set_fact:
9+
peer_device: "{{neighbors[interface]['peerdevice']}}"
10+
neighbor_interface: "{{neighbors[interface]['peerport']}}"
11+
12+
- conn_graph_facts: host={{ peer_device }}
13+
connection: local
14+
15+
- set_fact:
16+
peer_host: "{{device_info['mgmtip']}}"
17+
peer_hwsku: "{{device_info['HwSku']}}"
18+
19+
- set_fact:
20+
intfs_to_exclude: "{{interface}}"
21+
22+
- name: bring up neighbor interface {{neighbor_interface}} on {{peer_host}}
23+
action: apswitch template=neighbor_interface_no_shut_single.j2
24+
args:
25+
host: "{{peer_host}}"
26+
login: "{{switch_login[hwsku_map[peer_hwsku]]}}"
27+
connection: switch
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,33 @@
1+
### Playbook that call individual testcase by name defined in roles/test/vars/testcases.yml
2+
#
13
- debug: msg="!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
24
- debug: msg="!!!!!!!!!!!!!!!!!!!! start to run test {{ testcase_name }} !!!!!!!!!!!!!!!!!!!!"
35
- debug: msg="!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
46

57
- name: do basic sanity check before each test
68
include: base_sanity.yml
9+
vars:
10+
recover: true
711

812
- name: validate all interfaces is up
913
include: interface.yml
14+
vars:
15+
recover: true
1016

11-
########### if your test playbook requires more extra_vars than ptf_host and testbed_type, you may specify them here
12-
########### ptf_host and testbed_type are handled by default
13-
########### configure extra vars if your testcases need more vars
14-
########### or when you call the playbook, you have to specify your extra_vars
15-
- block:
16-
- name: Set dscp_mode for decap test for broadcom
17-
set_fact:
18-
dscp_mode: pipe
19-
when:
20-
- sonic_hwsku in broadcom_hwskus
21-
- dscp_mode is not defined
22-
23-
- name: Set dscp_mode var for decap test for mellanox
24-
set_fact:
25-
dscp_mode: uniform
26-
when:
27-
- sonic_hwsku in mellanox_hwskus
28-
- dscp_mode is not defined
29-
30-
- debug: var=testcases[testcase_name]['execvars']
31-
when: testcases[testcase_name]['execvars'] is defined
17+
### by default, when calling a test case name, we pass 'testbed_type', 'ptf_host, 'dut_name(ansible_hoatname)' down to test playbook.
18+
### if your test playbook requires more extra vars then default, please make sure you handled them correctly within test playbook.
19+
- debug: var=testcases[testcase_name]['required_vars']
20+
when: testcases[testcase_name]['required_vars'] is defined
3221

3322
- name: run test case {{ testcases[testcase_name]['filename'] }} file
3423
include: "{{ testcases[testcase_name]['filename'] }}"
3524

3625
- name: do basic sanity check after each test
3726
include: base_sanity.yml
3827

28+
- name: validate all interfaces are up after test
29+
include: interface.yml
30+
3931
- debug: msg="!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
4032
- debug: msg="!!!!!!!!!!!!!!!!!!!! end running test {{ testcase_name }} !!!!!!!!!!!!!!!!!!!!"
4133
- debug: msg="!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"

0 commit comments

Comments
 (0)