-
Notifications
You must be signed in to change notification settings - Fork 315
Add checkpoint/restore functionality to nsinit and libcontainer #204
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package namespaces | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
"os/exec" | ||
"strconv" | ||
|
||
"github.com/docker/libcontainer" | ||
) | ||
|
||
const ( | ||
CheckpointLog = "dump.log" | ||
RestoreLog = "restore.log" | ||
) | ||
|
||
// Checkpoint the specified container using the criu(8) utility. | ||
func Checkpoint(criuBinary string, container *libcontainer.Config, imageDir string, initPid int, verbose bool) error { | ||
// Prepare command line arguments. | ||
args := []string{ | ||
"dump", "-v4", | ||
"-D", imageDir, "-o", CheckpointLog, | ||
"--root", container.RootFs, | ||
"--manage-cgroups", "--evasive-devices", | ||
"-t", strconv.Itoa(initPid), | ||
} | ||
for _, mountpoint := range container.MountConfig.Mounts { | ||
args = append(args, "--ext-mount-map", fmt.Sprintf("%s:%s", mountpoint.Destination, mountpoint.Destination)) | ||
} | ||
|
||
// Execute criu to checkpoint. | ||
if verbose { | ||
log.Printf("Running CRIU: %s %v\n", criuBinary, args) | ||
} | ||
output, err := exec.Command(criuBinary, args...).CombinedOutput() | ||
if verbose && len(output) > 0 { | ||
log.Printf("%s\n", output) | ||
} | ||
return err | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package namespaces | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
"os/exec" | ||
|
||
"github.com/docker/libcontainer" | ||
) | ||
|
||
// Restore the specified container (previously checkpointed) using the | ||
// criu(8) utility. | ||
func Restore(criuBinary string, container *libcontainer.Config, imageDir string, verbose bool) error { | ||
// Prepare command line arguments. | ||
args := []string{ | ||
"restore", "-d", "-v4", | ||
"-D", imageDir, "-o", RestoreLog, | ||
"--root", container.RootFs, | ||
"--manage-cgroups", "--evasive-devices", | ||
} | ||
for _, mountpoint := range container.MountConfig.Mounts { | ||
args = append(args, "--ext-mount-map", fmt.Sprintf("%s:%s", mountpoint.Destination, mountpoint.Source)) | ||
} | ||
|
||
// Execute criu to restore. | ||
if verbose { | ||
log.Printf("Running CRIU: %s %v\n", criuBinary, args) | ||
} | ||
output, err := exec.Command(criuBinary, args...).CombinedOutput() | ||
if verbose && len(output) > 0 { | ||
log.Printf("%s\n", output) | ||
} | ||
return err | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package main | ||
|
||
import ( | ||
"log" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/codegangsta/cli" | ||
"github.com/docker/libcontainer" | ||
) | ||
|
||
var checkpointDescription string = `Checkpoint a process tree running in a container with the criu(8) | ||
utility. | ||
|
||
The container ID is determined by one of the following three methods | ||
in the order described: | ||
|
||
1. Command line argument | ||
2. Last pathname component of the data_path environment variable | ||
3. Last pathname component of the current working directory | ||
|
||
If method 1 is used, the libcontainer's home directory must be | ||
specified in the LIBCONTAINER_DIR environment variable. | ||
|
||
Nsinit expects to find the container's container.json and state.json | ||
files in the container subdirectory of the libcontainer's home | ||
directory. | ||
|
||
The user has to specify an image home directory for criu(8) by setting | ||
the CRIU_IMG_HOME_DIR environment variable. Within this directory, | ||
a container's image files will be saved in <container_id>/criu_img | ||
subdirectory. | ||
|
||
Returns 0 on success. On error, prints an error message and returns | ||
a non-zero code. | ||
|
||
ENVIRONMENT: | ||
CRIU_BINARY criu binary to execute, if not set "criu" is assumed | ||
CRIU_IMG_HOME_DIR criu image home directory | ||
LIBCONTAINER_DIR libcontainer home directory | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: It is not clear what this should point to from the description. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The EXAMPLE: section right below this section shows the typical value for LIBCONTAINER_DIR. It makes life easy for the user because its value doesn't change from container to container. In contract, data_path is set to the actual container.json file and the user has to repeatedly set it for each container. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A slight correction to my comment above: data_path is set to the directory where container.json file is. The pathname to this directory includes the container ID which changes from container to container. So the preferred way of doing checkpoint and restore (least amount of typing) is to set LIBCONTAINER_DIR once and provide container ID on nsinit command line. In this case, the user can be in any directory. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. awesome. |
||
data_path directory pathname of container.json (no trailing /) | ||
log pathname where to log | ||
|
||
EXAMPLE: | ||
# export LIBCONTAINER_DIR=/var/lib/docker/execdriver/native | ||
# export CRIU_IMG_HOME_DIR=/var/lib/docker/containers | ||
# docker ps -lq --no-trunc | ||
281ab0098269e515e3f81661c3cd6272abb640cf352efc64b3b98cc2470f3944 | ||
# nsinit checkpoint 281ab0098269e515e3f81661c3cd6272abb640cf352efc64b3b98cc2470f3944 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now nsinit works off of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checkpoint does actually work off of cwd (without the need to set LIBCONTAINER_DIR or specifying container ID), but restore will not work because Docker removes the container directory after checkpoint (container has exited). IOW, there is no cwd for restore. |
||
checkpoint succeeded | ||
# ` | ||
|
||
var checkpointCommand = cli.Command{ | ||
Name: "checkpoint", | ||
Usage: "checkpoint a container", | ||
Action: checkpointAction, | ||
Description: checkpointDescription, | ||
Flags: []cli.Flag{ | ||
cli.BoolFlag{Name: "verbose, v", Usage: "enable verbose mode"}, | ||
}, | ||
} | ||
|
||
func checkpointAction(context *cli.Context) { | ||
if len(context.Args()) > 1 { | ||
log.Fatal("Too many command line arguments\n") | ||
} | ||
|
||
// Get container ID and set dataPath if needed. | ||
containerId := getContainerId(context) | ||
|
||
// Load the container.json file to verify that we have | ||
// a valid container. | ||
container, err := loadConfig() | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
// Get the init PID of the process tree from state.json. | ||
state, err := libcontainer.GetState(dataPath) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
initPid := state.InitPid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we want to verify we didn't get a 0? Which would be an unitialized InitPid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do. If we go to CRIU RPC with pid == 0 it will dump the requester :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a check against initPid of 0. If/when we use CRIU RPC, will revisit the issue. |
||
if initPid == 0 { | ||
log.Fatal("Container's init PID is uninitialized\n") | ||
} | ||
|
||
// Create an image directory for this container (which | ||
// may already exist from a previous checkpoint). | ||
imageDir := getImageDir(containerId) | ||
err = os.MkdirAll(imageDir, 0700) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
// Copy container.json in the criu image directory for | ||
// later use during restore. | ||
copyFile(filepath.Join(dataPath, "container.json"), filepath.Join(imageDir, "container.json")) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
// Run criu and exit on error because our caller doesn't take return value. | ||
err = runCriu(context, container, containerId, imageDir, initPid) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/codegangsta/cli" | ||
"github.com/docker/libcontainer" | ||
"github.com/docker/libcontainer/namespaces" | ||
) | ||
|
||
// Refer to the description message at the beginning of this file to | ||
// see how we get the container ID. | ||
func getContainerId(context *cli.Context) string { | ||
if len(context.Args()) == 1 { | ||
// dataPath may have been initialized from data_path if | ||
// in environment (if it was set). But if a container | ||
// ID is specified on the command line, dataPath will be | ||
// reinitialized here from LIBCONTAINER_DIR environment | ||
// variable and the specified container ID. | ||
containerId := context.Args()[0] | ||
if libcontainerDir == "" { | ||
log.Fatal("LIBCONTAINER_DIR not set") | ||
} | ||
dataPath = filepath.Join(libcontainerDir, containerId) | ||
return containerId | ||
} | ||
|
||
if dataPath == "" { | ||
// If the container has been checkpointed, the directory | ||
// where its container.json file existed has been removed. | ||
// So for restore, we cannot get the container ID from | ||
// the cwd pathname. | ||
if context.Command.Name == "restore" { | ||
log.Fatal("Specify container ID as an argument or set data_path env var") | ||
} | ||
|
||
cwd, err := os.Getwd() | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
dataPath = cwd | ||
} | ||
|
||
// Extract container ID from the pathname. | ||
containerId := filepath.Base(dataPath) | ||
if containerId == "" { | ||
log.Fatal("Cannot determine container ID") | ||
} | ||
|
||
return containerId | ||
} | ||
|
||
// Return the directory pathname where CRIU should save and retrieve | ||
// its image files. | ||
func getImageDir(containerId string) string { | ||
p := os.Getenv("CRIU_IMG_HOME_DIR") | ||
if p == "" { | ||
log.Fatal("CRIU_IMG_HOME_DIR not set") | ||
} | ||
return filepath.Join(p, containerId, "criu_img") | ||
} | ||
|
||
// Common code for checkpoint and restore. | ||
func runCriu(context *cli.Context, container *libcontainer.Config, containerId, imageDir string, initPid int) error { | ||
verbose := context.Bool("verbose") | ||
cmd := context.Command.Name | ||
|
||
criuBinary := os.Getenv("CRIU_BINARY") | ||
if criuBinary == "" { | ||
criuBinary = "criu" | ||
} | ||
|
||
var err error | ||
if cmd == "checkpoint" { | ||
err = namespaces.Checkpoint(criuBinary, container, imageDir, initPid, verbose) | ||
} else { | ||
err = namespaces.Restore(criuBinary, container, imageDir, verbose) | ||
} | ||
if !verbose { | ||
return err | ||
} | ||
|
||
if err == nil { | ||
fmt.Printf("%s succeeded\n", cmd) | ||
} else { | ||
fmt.Printf("%s failed: %s\n", cmd, err) | ||
var logFile string | ||
if cmd == "checkpoint" { | ||
logFile = filepath.Join(imageDir, namespaces.CheckpointLog) | ||
} else { | ||
logFile = filepath.Join(imageDir, namespaces.RestoreLog) | ||
} | ||
fmt.Printf("Cause of failure may be in %s\n", logFile) | ||
} | ||
return err | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add some comments to the exported functions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.