Skip to content

Tests for process tree structure #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 16, 2017
6 changes: 4 additions & 2 deletions example/docker-helper/docker_helper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .helper import DockerHelper
from .utils import list_container_processes, output_lines, wait_for_log_line
from .utils import (
list_container_processes, output_lines, wait_for_log_line,
build_process_tree)

__all__ = ['DockerHelper', 'list_container_processes', 'output_lines',
'wait_for_log_line']
'wait_for_log_line', 'build_process_tree']
36 changes: 36 additions & 0 deletions example/docker-helper/docker_helper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def output_lines(raw_output, encoding='utf-8'):
@attr.s
class PsRow(object):
pid = attr.ib()
ppid = attr.ib()
ruser = attr.ib()
args = attr.ib()

Expand All @@ -47,6 +48,15 @@ def columns(cls):
return [a.name for a in attr.fields(cls)]


@attr.s
class PsTree(object):
row = attr.ib()
children = attr.ib()

def count(self):
return 1 + sum(row.count() for row in self.children)


def list_container_processes(container):
"""
List the processes running inside a container.
Expand Down Expand Up @@ -80,3 +90,29 @@ def list_container_processes(container):
ps_rows = [row for row in ps_rows if row.args != cmd_string]

return ps_rows


def _build_process_subtree(ps_rows, ps_tree):
for row in ps_rows:
if row.ppid == ps_tree.row.pid:
tree = PsTree(row=row, children=[])
ps_tree.children.append(tree)
_build_process_subtree(ps_rows, tree)


def build_process_tree(ps_rows):
"""
Build a tree structure from a list of PsRow objects.

:param ps_rows: a list of PsRow objects
:return: a PsTree object
"""
ps_tree = None
for row in ps_rows:
if row.ppid == '0':
assert ps_tree is None
ps_tree = PsTree(row=row, children=[])
assert ps_tree is not None
_build_process_subtree(ps_rows, ps_tree)
assert ps_tree.count() == len(ps_rows)
return ps_tree
241 changes: 145 additions & 96 deletions example/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
from testtools.matchers import (
AfterPreprocessing as After, Contains, Equals, GreaterThan, HasLength,
LessThan, MatchesAll, MatchesAny, MatchesDict, MatchesListwise,
MatchesRegex, MatchesSetwise, MatchesStructure, Not)
MatchesRegex, MatchesSetwise, MatchesStructure, Not, Mismatch)

from docker_helper import list_container_processes, output_lines
from docker_helper import (
list_container_processes, output_lines, build_process_tree)
from fixtures import * # noqa: We import these so pytest can find them.


Expand All @@ -35,30 +36,53 @@ def filter_ldconfig_process(ps_rows):
if not (row.ruser == 'django' and 'ldconfig' in row.args)]


def assert_tini_pid_1(ps_row, cmd):
args = 'tini -- django-entrypoint.sh {}'.format(cmd)
assert_that(ps_row,
MatchesStructure.byEquality(pid='1', ruser='root', args=args))


def matches_attributes_values(attributes, values):
"""
Returns a matcher that matches the values of several attributes of an
object.
"""
return MatchesStructure.byEquality(
**{a: v for a, v in zip(attributes, values)})


def matches_ruser_args_unordered(*expected_values):
"""
Returns a matcher that, given a list of PsRow objects, ensures that objects
are present that have 'ruser' and 'args' values that match those given.
"""
def row_matcher(values):
return matches_attributes_values(['ruser', 'args'], values)

return MatchesSetwise(*map(row_matcher, expected_values))
class PsTreeMismatch(Mismatch):
def __init__(self, row_fields, child_count, fields_mm, children_mm):
self.row_fields = row_fields
self.child_count = child_count
self.fields_mm = fields_mm
self.children_mm = children_mm

def describe(self):
rfs = ['{}={!r}'.format(k, v)
for k, v in sorted(self.row_fields.items())]
suffix = '' if self.child_count == 1 else 'ren'
descriptions = ['PsTree({} with {} child{}) mismatch: ['.format(
', '.join(rfs), self.child_count, suffix)]
if self.fields_mm is not None:
for m in self.fields_mm.mismatches:
for l in m.describe().splitlines():
descriptions.append(' ' + l.rstrip('\n'))
if self.children_mm is not None:
descriptions.append(' mismatches in children:')
for l in self.children_mm.describe().splitlines():
descriptions.append(' ' + l.rstrip('\n'))
descriptions.append(']')
return '\n'.join(descriptions)


class MatchesPsTree(object):
def __init__(self, ruser, args, ppid=None, pid=None, children=()):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it still make sense to have ppid here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For completeness, I think it does. There's no reason not to have it, and people might want to examine subtrees and assert that the subtree's root has the right ppid.

self.row_fields = {'ruser': ruser, 'args': args}
if ppid is not None:
self.row_fields['ppid'] = ppid
if pid is not None:
self.row_fields['pid'] = pid
self.children = children

def __str__(self):
rfs = ['{}={!r}'.format(k, v)
for k, v in sorted(self.row_fields.items())]
return '{}({}, children={})'.format(
self.__class__.__name__, ', '.join(rfs), str(self.children))

def match(self, value):
fields_mm = MatchesStructure.byEquality(**self.row_fields).match(
value.row)
children_mm = MatchesSetwise(*self.children).match(value.children)
if fields_mm is not None or children_mm is not None:
return PsTreeMismatch(
self.row_fields, len(self.children), fields_mm, children_mm)


class TestWeb(object):
Expand All @@ -67,69 +91,88 @@ def test_expected_processes(self, web_only_container):
When the container is running, there should be 5 running processes:
tini, the Nginx master and worker, and the Gunicorn master and worker.
"""
ps_data = list_container_processes(web_only_container)
ps_rows = filter_ldconfig_process(
list_container_processes(web_only_container))

# Sometimes it takes a little while for the processes to settle so try
# a few times with a delay inbetween.
retries = 3
delay = 0.5
for _ in range(retries):
if len(ps_data) == 5:
if len(ps_rows) == 5:
break
time.sleep(delay)
ps_data = list_container_processes(web_only_container)

assert_tini_pid_1(ps_data.pop(0), 'mysite.wsgi:application')

ps_data = filter_ldconfig_process(ps_data)

# The next processes we have no control over the start order or PIDs...
assert_that(ps_data, matches_ruser_args_unordered(
('root', 'nginx: master process nginx -g daemon off;'),
('nginx', 'nginx: worker process'),
('django',
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117'),
# No obvious way to differentiate Gunicorn master from worker
('django',
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117'),
))
ps_rows = filter_ldconfig_process(
list_container_processes(web_only_container))

ps_tree = build_process_tree(ps_rows)

tini_args = 'tini -- django-entrypoint.sh mysite.wsgi:application'
gunicorn_master_args = (
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117')
gunicorn_worker_args = (
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117')
nginx_master_args = 'nginx: master process nginx -g daemon off;'
nginx_worker_args = 'nginx: worker process'

assert_that(
ps_tree,
MatchesPsTree('root', tini_args, pid='1', children=[
MatchesPsTree('django', gunicorn_master_args, children=[
MatchesPsTree('django', gunicorn_worker_args),
# FIXME: Nginx should not be parented by Gunicorn
MatchesPsTree('root', nginx_master_args, children=[
MatchesPsTree('nginx', nginx_worker_args),
]),
]),
]))

def test_expected_processes_single_container(self, single_container):
"""
When the container is running, there should be 7 running processes:
tini, the Nginx master and worker, the Gunicorn master and worker, and
the Celery worker ("solo", non-forking) and beat processes.
"""
ps_data = list_container_processes(single_container)

assert_tini_pid_1(ps_data.pop(0), 'mysite.wsgi:application')

ps_data = filter_ldconfig_process(ps_data)

# The next processes we have no control over the start order or PIDs...
assert_that(ps_data, matches_ruser_args_unordered(
('root', 'nginx: master process nginx -g daemon off;'),
('nginx', 'nginx: worker process'),
('django',
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117'),
# No obvious way to differentiate Gunicorn master from worker
('django',
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117'),
('django',
'/usr/local/bin/python /usr/local/bin/celery worker --pool=solo '
'--pidfile worker.pid --concurrency 1'),
('django',
'/usr/local/bin/python /usr/local/bin/celery beat --pidfile '
'beat.pid'),
))
ps_rows = list_container_processes(single_container)
ps_tree = build_process_tree(ps_rows)

tini_args = 'tini -- django-entrypoint.sh mysite.wsgi:application'
gunicorn_master_args = (
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117')
gunicorn_worker_args = (
'/usr/local/bin/python /usr/local/bin/gunicorn '
'mysite.wsgi:application --pid /var/run/gunicorn/gunicorn.pid '
'--bind unix:/var/run/gunicorn/gunicorn.sock --umask 0117')
nginx_master_args = 'nginx: master process nginx -g daemon off;'
nginx_worker_args = 'nginx: worker process'
celery_worker_args = (
'/usr/local/bin/python /usr/local/bin/celery worker --pool=solo '
'--pidfile worker.pid --concurrency 1')
celery_beat_args = (
'/usr/local/bin/python /usr/local/bin/celery beat --pidfile '
'beat.pid')

assert_that(
ps_tree,
MatchesPsTree('root', tini_args, pid='1', children=[
MatchesPsTree('django', gunicorn_master_args, children=[
MatchesPsTree('django', gunicorn_worker_args),
# FIXME: Celery worker should not be parented by Gunicorn
MatchesPsTree('django', celery_worker_args),
# FIXME: Celery beat should not be parented by Gunicorn
MatchesPsTree('django', celery_beat_args),
# FIXME: Nginx should not be parented by Gunicorn
MatchesPsTree('root', nginx_master_args, children=[
MatchesPsTree('nginx', nginx_worker_args),
]),
]),
]))

@pytest.mark.clean_db
def test_database_tables_created(self, db_container, web_container):
Expand Down Expand Up @@ -372,20 +415,24 @@ def test_expected_processes(self, worker_only_container):
When the container is running, there should be 3 running processes:
tini, and the Celery worker master and worker.
"""
ps_data = list_container_processes(worker_only_container)

assert_tini_pid_1(ps_data.pop(0), 'celery worker')

# The next processes we have no control over the start order or PIDs...
assert_that(ps_data, matches_ruser_args_unordered(
('django',
'/usr/local/bin/python /usr/local/bin/celery worker '
'--concurrency 1'),
# No obvious way to differentiate Celery master from worker
('django',
'/usr/local/bin/python /usr/local/bin/celery worker '
'--concurrency 1'),
))
ps_rows = list_container_processes(worker_only_container)
ps_tree = build_process_tree(ps_rows)

tini_args = 'tini -- django-entrypoint.sh celery worker'
celery_master_args = (
'/usr/local/bin/python /usr/local/bin/celery worker '
'--concurrency 1')
celery_worker_args = (
'/usr/local/bin/python /usr/local/bin/celery worker '
'--concurrency 1')

assert_that(
ps_tree,
MatchesPsTree('root', tini_args, pid='1', children=[
MatchesPsTree('django', celery_master_args, children=[
MatchesPsTree('django', celery_worker_args),
]),
]))

@pytest.mark.clean_amqp
def test_amqp_queues_created(self, amqp_container, worker_container):
Expand All @@ -411,12 +458,14 @@ def test_expected_processes(self, beat_only_container):
When the container is running, there should be 2 running processes:
tini, and the Celery beat process.
"""
ps_data = list_container_processes(beat_only_container)
ps_rows = list_container_processes(beat_only_container)
ps_tree = build_process_tree(ps_rows)

assert_that(ps_data, HasLength(2))
assert_tini_pid_1(ps_data[0], 'celery beat')
# We don't know what PID we will get, so don't check it
assert_that(ps_data[1], MatchesStructure.byEquality(
ruser='django',
args='/usr/local/bin/python /usr/local/bin/celery beat',
))
tini_args = 'tini -- django-entrypoint.sh celery beat'
celery_beat_args = '/usr/local/bin/python /usr/local/bin/celery beat'

assert_that(
ps_tree,
MatchesPsTree('root', tini_args, pid='1', children=[
MatchesPsTree('django', celery_beat_args),
]))