Skip to content

Commit 8c82f1c

Browse files
committed
CP-54207: Move VBD_attach outside of VM migrate downtime
VBDs can be attached to multiple VMs, so now that VBD_plug has been split into VBD_attach and VBD_activate, the attach can happen outside of the VM migrate downtime. This doesn't change the overall duration of the migration but can reduce the downtime by several seconds. Signed-off-by: Steven Woods <[email protected]>
1 parent 6eba561 commit 8c82f1c

File tree

1 file changed

+41
-12
lines changed

1 file changed

+41
-12
lines changed

ocaml/xenopsd/lib/xenops_server.ml

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ type operation =
338338
| VM_suspend of (Vm.id * data)
339339
| VM_resume of (Vm.id * data)
340340
| VM_restore_vifs of Vm.id
341-
| VM_restore_devices of (Vm.id * bool)
341+
| VM_restore_devices of (Vm.id * bool * bool)
342342
| VM_migrate of vm_migrate_op
343343
| VM_receive_memory of vm_receive_op
344344
| VBD_hotplug of Vbd.id
@@ -1763,7 +1763,7 @@ let rec atomics_of_operation = function
17631763
serial "VIF.activate_and_plug" ~id
17641764
[VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id]
17651765
)
1766-
| VM_restore_devices (id, restore_vifs) ->
1766+
| VM_restore_devices (id, restore_vifs, migration) ->
17671767
let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in
17681768
let vgpus = VGPU_DB.vgpus id in
17691769
let pcis = PCI_DB.pcis id |> pci_plug_order in
@@ -1777,10 +1777,23 @@ let rec atomics_of_operation = function
17771777
[VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id]
17781778
)
17791779
in
1780+
let activate_vbds typ vbds =
1781+
let name_multi = Printf.sprintf "VBDs.activate %s" typ in
1782+
parallel name_multi ~id
1783+
(List.map (fun vbd -> VBD_activate vbd.Vbd.id) vbds)
1784+
in
1785+
let prep_vbds =
1786+
if !xenopsd_vbd_plug_unplug_legacy || not migration then
1787+
plug_vbds
1788+
else
1789+
(* If plug is split into activate and attach, when migrating we don't
1790+
need to attach here as we attached outside of the VM downtime *)
1791+
activate_vbds
1792+
in
17801793
[
17811794
(* rw vbds must be plugged before ro vbds, see vbd_plug_sets *)
1782-
plug_vbds "RW" vbds_rw
1783-
; plug_vbds "RO" vbds_ro
1795+
prep_vbds "RW" vbds_rw
1796+
; prep_vbds "RO" vbds_ro
17841797
; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else [])
17851798
; (* Nvidia SRIOV PCI devices have been already been plugged *)
17861799
parallel_map "VGPUs.activate" ~id vgpus (fun vgpu ->
@@ -1897,7 +1910,7 @@ let rec atomics_of_operation = function
18971910
]
18981911
; vgpu_start_operations
18991912
; [VM_restore (id, data, vgpu_data)]
1900-
; atomics_of_operation (VM_restore_devices (id, true))
1913+
; atomics_of_operation (VM_restore_devices (id, true, false))
19011914
; [
19021915
(* At this point the domain is considered survivable. *)
19031916
VM_set_domain_action_request (id, None)
@@ -2573,7 +2586,7 @@ and trigger_cleanup_after_failure op t =
25732586
| VM_shutdown (id, _)
25742587
| VM_suspend (id, _)
25752588
| VM_restore_vifs id
2576-
| VM_restore_devices (id, _)
2589+
| VM_restore_devices (id, _, _)
25772590
| VM_resume (id, _) ->
25782591
immediate_operation dbg id (VM_check_state id)
25792592
| VM_receive_memory {vmr_id= id; vmr_final_id= final_id; _} ->
@@ -2696,9 +2709,9 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
26962709
| VM_restore_vifs id ->
26972710
debug "VM_restore_vifs %s" id ;
26982711
perform_atomics (atomics_of_operation op) t
2699-
| VM_restore_devices (id, restore_vifs) ->
2712+
| VM_restore_devices (id, restore_vifs, migration) ->
27002713
(* XXX: this is delayed due to the 'attach'/'activate' behaviour *)
2701-
debug "VM_restore_devices %s %b" id restore_vifs ;
2714+
debug "VM_restore_devices %s %b %b" id restore_vifs migration ;
27022715
perform_atomics (atomics_of_operation op) t
27032716
| VM_resume (id, _data) ->
27042717
debug "VM.resume %s" id ;
@@ -3022,11 +3035,27 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
30223035
( try
30233036
let no_sharept = VGPU_DB.vgpus id |> List.exists is_no_sharept in
30243037
debug "VM %s no_sharept=%b (%s)" id no_sharept __LOC__ ;
3038+
let early_attach =
3039+
if !xenopsd_vbd_plug_unplug_legacy then
3040+
[]
3041+
else
3042+
(* If plug is split into activate and attach, we can attach
3043+
early so that it is outside of the VM downtime *)
3044+
parallel_map "VBDs.set_active_and_attach" ~id (VBD_DB.vbds id)
3045+
(fun vbd ->
3046+
serial "VBD.set_active_and_attach" ~id
3047+
[
3048+
VBD_set_active (vbd.Vbd.id, true)
3049+
; VBD_attach vbd.Vbd.id
3050+
]
3051+
)
3052+
in
30253053
perform_atomics
30263054
([VM_create (id, Some memory_limit, Some final_id, no_sharept)]
3027-
@ (* Perform as many operations as possible on the destination
3028-
domain before pausing the original domain *)
3029-
atomics_of_operation (VM_restore_vifs id)
3055+
(* Perform as many operations as possible on the destination
3056+
domain before pausing the original domain *)
3057+
@ atomics_of_operation (VM_restore_vifs id)
3058+
@ early_attach
30303059
)
30313060
t ;
30323061
Handshake.send s Handshake.Success
@@ -3142,7 +3171,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
31423171
) ;
31433172
debug "VM.receive_memory: restoring remaining devices and unpausing" ;
31443173
perform_atomics
3145-
(atomics_of_operation (VM_restore_devices (final_id, false))
3174+
(atomics_of_operation (VM_restore_devices (final_id, false, true))
31463175
@ [
31473176
VM_unpause final_id
31483177
; VM_set_domain_action_request (final_id, None)

0 commit comments

Comments
 (0)